Skip to content

maintenance

logger = get_logger() module-attribute

update_dataset_info(dataset_id)

Source code in datasets/tasks/maintenance.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@shared_task()
def update_dataset_info(dataset_id: UUID):
    dataset = Dataset.objects.get(id=dataset_id)

    match dataset.mode:
        case Dataset.Mode.LOCAL.value:
            if dataset.local_database is None:
                raise Exception("Dataset has no database")

            database = dataset.local_database
            with StardogApi.admin() as admin:
                database_api = admin.database(database)

                logger.info('Retrieving namespace info')
                namespaces = database_api.namespaces()

            with StardogApi.connection(database) as conn:
                logger.info('Getting number of triples')
                triple_count = conn.size(exact=False)
        case Dataset.Mode.SPARQL.value:
            if dataset.sparql_endpoint is None:
                raise Exception("Dataset has no database")

            namespaces = []
            triple_count = int(dataset.get_query_service().query_select('''
                SELECT (COUNT(*) AS ?count)
                WHERE { ?s ?p ?o }
            ''', limit=1, ignore_limit=True).get('results').get('bindings')[0].get('count').get('value'))
        case _:
            raise Exception(f"Unsupported mode {dataset.mode}")

    # path = ['results', 'bindings', 0, 'count', 'value']
    # ATOM_QUERY = 'SELECT (COUNT(DISTINCT {}) as ?count) {{ ?s ?p ?o }}'
    # subject_count = deepget(client.query(database, ATOM_QUERY.format("?s")), path, default=-1)
    # predicate_count = deepget(client.query(database, ATOM_QUERY.format("?p")), path, default=-1)
    # object_count = deepget(client.query(database, ATOM_QUERY.format("?o")), path, default=-1)

    logger.info('Saving dataset info')
    dataset = Dataset.objects.get(id=dataset_id)
    dataset.namespaces = namespaces
    dataset.statistics = {
        **(dataset.statistics or {}),
        'triple_count': triple_count,
    }
    dataset.save()
    logger.info('Successfully updated dataset info')