Skip to content

models

Dataset

Bases: TaskMixin, TimeStampMixin, OwnableMixin

The internal dataset model.

Source code in datasets/models.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class Dataset(TaskMixin, TimeStampMixin, OwnableMixin):
    """
    The internal dataset model.
    """
    STATES = ((state.value, state.value) for state in DatasetState)

    class Mode(models.TextChoices):
        """
        The Mode class is an enumeration of the possible modes of a dataset
        """
        LOCAL = 'LOCAL', _('Imported locally ')
        SPARQL = 'SPARQL', _('From SPARQL endpoint')

    class SearchMode(models.TextChoices):
        """
        The SearchMode class is an enumeration of the possible search modes of a dataset
        """
        LOCAL = 'LOCAL', _('Imported locally ')
        WIKIDATA = 'WIKIDATA', _('From Wikidata')
        TRIPLYDB = 'TRIPLYDB', _('From TripyDB')

    id = models.UUIDField(default=uuid.uuid4, primary_key=True)
    """The identifier of the dataset."""
    name = models.CharField(max_length=255)
    """The name of the dataset."""
    description = models.TextField(blank=True)
    """The description of the dataset."""
    source = models.JSONField()
    """The source of the dataset."""
    mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL)
    """The mode of the dataset."""
    search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL)
    """The search mode of the dataset."""
    creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True)
    """The user who created the dataset."""

    local_database = models.CharField(max_length=255, null=True)
    """The local stardog database identifier of the dataset."""
    sparql_endpoint = models.CharField(max_length=255, null=True)
    """The SPARQL endpoint of the dataset."""

    statistics = models.JSONField(null=True)
    """The statistics of the dataset."""
    namespaces = models.JSONField(null=True)
    """The list of sparql namespaces/prefixes in the dataset."""
    state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255)
    """The import state of the dataset."""
    import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True)
    """The import task of the dataset."""

    objects = models.Manager()

    @property
    def search_index_path(self):
        """
        The path to the search index of the dataset.
        :return:
        """
        return DATA_DIR / f'search_index_{self.local_database}' if self.local_database else None

    def get_search_service(self) -> SearchService:
        """
        Return appropriate search service depending on the search mode
        """
        match self.search_mode:
            case self.SearchMode.LOCAL:
                if not self.search_index_path.exists():
                    raise Exception('Dataset search index has not been created yet')
                return LocalSearchService(self.search_index_path)
            case self.SearchMode.WIKIDATA:
                return WikidataSearchService()
            case self.SearchMode.TRIPLYDB:
                if 'tdb_id' not in self.source:
                    raise Exception('Dataset is not a TriplyDB dataset')
                return TriplyDBSearchService(self.source['tdb_id'])
            case _:
                raise ValueError(f'Unknown search mode {self.search_mode}')

    def get_query_service(self) -> QueryService:
        """
        If the mode is local, return a local query service, otherwise return a SPARQL query service
        """
        match self.mode:
            case self.Mode.LOCAL:
                if not self.local_database:
                    raise Exception('Dataset local database has not been imported yet')
                return LocalQueryService(str(self.local_database))
            case self.Mode.SPARQL:
                return SPARQLQueryService(str(self.sparql_endpoint))
            case _:
                raise ValueError(f'Unknown mode {self.mode}')

    def can_view(self, user: User):
        return bool(user)

    def can_edit(self, user: User):
        return super().can_edit(user) or self.creator == user

STATES = (state.value, state.value) for state in DatasetState class-attribute

creator = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True) class-attribute

The user who created the dataset.

description = models.TextField(blank=True) class-attribute

The description of the dataset.

id = models.UUIDField(default=uuid.uuid4, primary_key=True) class-attribute

The identifier of the dataset.

import_task = models.OneToOneField('tasks.Task', on_delete=models.SET_NULL, null=True) class-attribute

The import task of the dataset.

local_database = models.CharField(max_length=255, null=True) class-attribute

The local stardog database identifier of the dataset.

mode = models.CharField(max_length=255, choices=Mode.choices, default=Mode.LOCAL) class-attribute

The mode of the dataset.

name = models.CharField(max_length=255) class-attribute

The name of the dataset.

namespaces = models.JSONField(null=True) class-attribute

The list of sparql namespaces/prefixes in the dataset.

objects = models.Manager() class-attribute

search_mode = models.CharField(max_length=255, choices=SearchMode.choices, default=SearchMode.LOCAL) class-attribute

The search mode of the dataset.

source = models.JSONField() class-attribute

The source of the dataset.

sparql_endpoint = models.CharField(max_length=255, null=True) class-attribute

The SPARQL endpoint of the dataset.

state = models.CharField(choices=STATES, default=DatasetState.QUEUED.value, max_length=255) class-attribute

The import state of the dataset.

statistics = models.JSONField(null=True) class-attribute

The statistics of the dataset.

Mode

Bases: models.TextChoices

The Mode class is an enumeration of the possible modes of a dataset

Source code in datasets/models.py
33
34
35
36
37
38
class Mode(models.TextChoices):
    """
    The Mode class is an enumeration of the possible modes of a dataset
    """
    LOCAL = 'LOCAL', _('Imported locally ')
    SPARQL = 'SPARQL', _('From SPARQL endpoint')

LOCAL = ('LOCAL', _('Imported locally ')) class-attribute

SPARQL = ('SPARQL', _('From SPARQL endpoint')) class-attribute

SearchMode

Bases: models.TextChoices

The SearchMode class is an enumeration of the possible search modes of a dataset

Source code in datasets/models.py
40
41
42
43
44
45
46
class SearchMode(models.TextChoices):
    """
    The SearchMode class is an enumeration of the possible search modes of a dataset
    """
    LOCAL = 'LOCAL', _('Imported locally ')
    WIKIDATA = 'WIKIDATA', _('From Wikidata')
    TRIPLYDB = 'TRIPLYDB', _('From TripyDB')

LOCAL = ('LOCAL', _('Imported locally ')) class-attribute

TRIPLYDB = ('TRIPLYDB', _('From TripyDB')) class-attribute

WIKIDATA = ('WIKIDATA', _('From Wikidata')) class-attribute

can_edit(user)

Source code in datasets/models.py
122
123
def can_edit(self, user: User):
    return super().can_edit(user) or self.creator == user

can_view(user)

Source code in datasets/models.py
119
120
def can_view(self, user: User):
    return bool(user)

get_query_service()

If the mode is local, return a local query service, otherwise return a SPARQL query service

Source code in datasets/models.py
105
106
107
108
109
110
111
112
113
114
115
116
117
def get_query_service(self) -> QueryService:
    """
    If the mode is local, return a local query service, otherwise return a SPARQL query service
    """
    match self.mode:
        case self.Mode.LOCAL:
            if not self.local_database:
                raise Exception('Dataset local database has not been imported yet')
            return LocalQueryService(str(self.local_database))
        case self.Mode.SPARQL:
            return SPARQLQueryService(str(self.sparql_endpoint))
        case _:
            raise ValueError(f'Unknown mode {self.mode}')

get_search_service()

Return appropriate search service depending on the search mode

Source code in datasets/models.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def get_search_service(self) -> SearchService:
    """
    Return appropriate search service depending on the search mode
    """
    match self.search_mode:
        case self.SearchMode.LOCAL:
            if not self.search_index_path.exists():
                raise Exception('Dataset search index has not been created yet')
            return LocalSearchService(self.search_index_path)
        case self.SearchMode.WIKIDATA:
            return WikidataSearchService()
        case self.SearchMode.TRIPLYDB:
            if 'tdb_id' not in self.source:
                raise Exception('Dataset is not a TriplyDB dataset')
            return TriplyDBSearchService(self.source['tdb_id'])
        case _:
            raise ValueError(f'Unknown search mode {self.search_mode}')

search_index_path() property

The path to the search index of the dataset. :return:

Source code in datasets/models.py
79
80
81
82
83
84
85
@property
def search_index_path(self):
    """
    The path to the search index of the dataset.
    :return:
    """
    return DATA_DIR / f'search_index_{self.local_database}' if self.local_database else None

DatasetState

Bases: Enum

The DatasetState class is an enumeration of the possible states of a dataset

Source code in datasets/models.py
17
18
19
20
21
22
23
24
class DatasetState(Enum):
    """
    The DatasetState class is an enumeration of the possible states of a dataset
    """
    QUEUED = 'QUEUED'
    IMPORTING = 'IMPORTING'
    IMPORTED = 'IMPORTED'
    FAILED = 'FAILED'

FAILED = 'FAILED' class-attribute

IMPORTED = 'IMPORTED' class-attribute

IMPORTING = 'IMPORTING' class-attribute

QUEUED = 'QUEUED' class-attribute