Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add subject filter #38

Merged
merged 1 commit into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions jest.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ export default {
coverageReporters: ['json-summary', 'text'],
coverageThreshold: {
global: {
lines: 20.8,
statements: 20.8,
branches: 11.9,
lines: 21.91,
statements: 21.91,
branches: 12.76,
functions: 21.05,
},
},
Expand Down
4,392 changes: 1,887 additions & 2,505 deletions package-lock.json

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,16 @@
"n3": "^1.17.2",
"pino": "^8.16.2",
"rdf-data-factory": "^1.1.2",
"rdf-ext": "^2.4.0",
"rdf-dereference": "^2.2.0",
"rdf-ext": "^2.5.0",
"rdf-js": "^4.0.2"
},
"devDependencies": {
"@rdfjs/types": "^1.1.0",
"@types/jest": "^28.1.8",
"@types/n3": "^1.16.4",
"@types/node": "^18.18.12",
"@types/rdf-ext": "^2.2.5",
"gts": "^5.2.0",
"jest": "^29.7.0",
"jest-coverage-thresholds-bumper": "^1.1.0",
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/class-partition.rq
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CONSTRUCT {
] .
} WHERE {
SELECT (COUNT(?type) AS ?entities) ?type {
#subjectFilter#
?s a ?type .
}
GROUP BY ?type
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/class-properties.rq
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ CONSTRUCT {
] .
} WHERE {
SELECT ?p ?t (COUNT(DISTINCT ?s) AS ?subjects) {
#subjectFilter#
?s a ?t ;
?p ?o .
}
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/entity-properties.rq
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CONSTRUCT {
] .
} WHERE {
SELECT (COUNT(?p) AS ?entities) ?p {
#subjectFilter#
?s ?p ?o .
}
GROUP BY ?p
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/object-literals.rq
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CONSTRUCT {
nde:distinctObjectsLiteral ?total .
} WHERE {
SELECT (COUNT(?o) as ?total) {
#subjectFilter#
?s ?p ?o .
FILTER(ISLITERAL(?o))
}
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/object-uri-space.rq
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ CONSTRUCT {
void:triples ?count .
} WHERE {
SELECT DISTINCT ?prefix (COUNT(?prefix) AS ?count) {
#subjectFilter#
?s ?p ?o .
FILTER(ISIRI(?o))
BIND(REPLACE(STR(?o), "([^/]+$)", "") AS ?prefix)
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/object-uris.rq
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CONSTRUCT {
nde:distinctObjectsURI ?total .
} WHERE {
SELECT (COUNT(?o) as ?total) {
#subjectFilter#
?s ?p ?o .
FILTER(ISIRI(?o))
}
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/properties.rq
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CONSTRUCT {
void:properties ?count .
} WHERE {
SELECT (COUNT(DISTINCT ?p) as ?count) {
#subjectFilter#
?s ?p ?o
}
}
1 change: 1 addition & 0 deletions queries/analysis/subjects.rq
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CONSTRUCT {
void:distinctSubjects ?count .
} WHERE {
SELECT (COUNT(DISTINCT ?s) as ?count) {
#subjectFilter#
?s ?p ?o .
FILTER(!ISBLANK(?s))
}
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/triples.rq
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CONSTRUCT {
void:triples ?count .
} WHERE {
SELECT (COUNT(*) as ?count) {
#subjectFilter#
?s ?p ?o
}
}
14 changes: 14 additions & 0 deletions queries/selection/supplemental.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@prefix nde: <https://data.netwerkdigitaalerfgoed.nl/def/> .

<http://data.bibliotheken.nl/id/dataset/albac> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/albac>" .
<http://data.bibliotheken.nl/id/dataset/brinkman> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/brinkman>" .
<http://data.bibliotheken.nl/id/dataset/corps> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/corps>" .
<http://data.bibliotheken.nl/id/dataset/dbnla> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/dbnla>" .
<http://data.bibliotheken.nl/id/dataset/dbnlt> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/dbnlt>" .
<http://data.bibliotheken.nl/id/dataset/gtt> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/gtt>" .
<http://data.bibliotheken.nl/id/dataset/kbcode> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/kbcode>" .
<http://data.bibliotheken.nl/id/dataset/nbt> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/nbt>" .
<http://data.bibliotheken.nl/id/dataset/persons> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/persone>" .
<http://data.bibliotheken.nl/id/dataset/rise-alba> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/rise-alba>" .
<http://data.bibliotheken.nl/id/dataset/rise-centsprenten> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/rise-centsprenten>" .
<http://data.bibliotheken.nl/id/dataset/stcn> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/stcn>" .
27 changes: 15 additions & 12 deletions src/analyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,21 @@ export class SparqlQueryAnalyzer implements Analyzer {
type?: string
): Promise<AsyncIterator<Quad> & ResultStream<Quad>> {
try {
return await new QueryEngine().queryQuads(this.query, {
initialBindings: this.bindingsFactory.fromRecord({
dataset: this.dataFactory.namedNode(dataset.iri),
}) as unknown as Bindings,
sources: [
{
type: 'sparql',
value: endpoint,
},
],
httpTimeout: 300_000, // Some SPARQL queries really take this long.
});
return await new QueryEngine().queryQuads(
this.query.replace('#subjectFilter#', dataset.subjectFilter ?? ''),
{
initialBindings: this.bindingsFactory.fromRecord({
dataset: this.dataFactory.namedNode(dataset.iri),
}) as unknown as Bindings,
sources: [
{
type: 'sparql',
value: endpoint,
},
],
httpTimeout: 300_000, // Some SPARQL queries really take this long.
}
);
} catch (e) {
if (type !== undefined) {
// Retry without explicit SPARQL type, which is needed for endpoints that offer a SPARQL Service Description.
Expand Down
6 changes: 5 additions & 1 deletion src/dataset.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
export class Dataset {
constructor(
public readonly iri: string,
public distributions: Distribution[]
public distributions: Distribution[],

// On the level of the dataset instead of the distribution because distribution may not have a URI, so cannot be
// referenced from supplemental.ttl.
public subjectFilter?: string
) {}

public getSparqlDistribution(): Distribution | null {
Expand Down
28 changes: 26 additions & 2 deletions src/selector.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import {Dataset, Distribution} from './dataset.js';
import {QueryEngine} from '@comunica/query-sparql';
import {Quad} from 'n3';
import {DataFactory, Quad} from 'n3';
import {resolve} from 'node:path';
import rdfDereferencer from 'rdf-dereference';
import namedNode = DataFactory.namedNode;
import factory from 'rdf-ext';

export interface Selector {
select(): Promise<Set<Dataset>>;
Expand All @@ -15,6 +19,14 @@ export class SparqlQuerySelector implements Selector {
private readonly queryEngine: QueryEngine
) {}
async select(): Promise<Set<Dataset>> {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const {data} = await rdfDereferencer.default.dereference(
resolve('queries/selection/supplemental.ttl'),
{localFiles: true}
);
const supplementalStore = await factory.dataset().import(data);

const quadStream = await this.queryEngine.queryQuads(this.config.query, {
sources: [
{
Expand All @@ -34,7 +46,19 @@ export class SparqlQuerySelector implements Selector {
quad.predicate.value &&
'http://www.w3.org/ns/dcat#Dataset' === quad.object.value
) {
dataset = new Dataset(quad.subject.value, []);
const subjectFilter = [
...supplementalStore.match(
quad.subject,
namedNode(
'https://data.netwerkdigitaalerfgoed.nl/def/subjectFilter'
)
),
][0]?.object.value;
dataset = new Dataset(
quad.subject.value,
[],
subjectFilter ? subjectFilter + '.' : undefined
);
datasets.add(dataset);
}

Expand Down