Skip to content

Commit

Permalink
Merge branch 'master' into prefer_sparql
Browse files Browse the repository at this point in the history
  • Loading branch information
jesper-friis authored Oct 13, 2024
2 parents eb1934c + 455cd08 commit faa0777
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 35 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,14 @@ jobs:
# https://pyup.io/vulnerabilities/CVE-2021-34141/44717/
# 70612: Jinja2 vulnerability. Only used as subdependency for mkdocs++ in tripper.
# https://data.safetycli.com/v/70612/97c/
# https://data.safetycli.com/v/72715/97c/ # update to mkdocs>=9.5.32
safety_options: |
--ignore=48547
--ignore=44715
--ignore=44716
--ignore=44717
--ignore=70612
--ignore=72715
## Build package
run_build_package: true
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ci:
# hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: end-of-file-fixer
exclude: ^.*\.(svg)$
Expand All @@ -41,13 +41,13 @@ repos:
- --skip-gitignore

- repo: https://github.com/psf/black
rev: 24.8.0
rev: 24.10.0
hooks:
- id: black
args: [--line-length=79]

- repo: https://github.com/PyCQA/bandit
rev: 1.7.9
rev: 1.7.10
hooks:
- id: bandit
args: ["-r"]
Expand Down
22 changes: 22 additions & 0 deletions tests/test_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,28 @@ def test_repr() -> None:
)


def test_n3() -> None:
"""Test n3()."""
from tripper import RDF, Literal

s = Literal(42, datatype=float).n3()
assert s == '"42"^^<http://www.w3.org/2001/XMLSchema#double>'
s = Literal("a string").n3()
assert s == '"a string"'
s = Literal('a string with "embedded" quotes').n3()
assert s == r'"a string with \"embedded\" quotes"'
s = Literal(r"a string with \"escaped\" quotes").n3()
assert s == r'"a string with \\\"escaped\\\" quotes"'
s = Literal('"json string"', datatype=RDF.JSON).n3()
assert s == (
r'"\"json string\""^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#JSON>'
)
s = Literal('{"a": 1}', datatype=RDF.JSON).n3()
assert (
s == r'"{\"a\": 1}"^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#JSON>'
)


def test_split_iri() -> None:
"""Test parse n3-encoded literal value."""
from tripper import DCTERMS, RDFS
Expand Down
82 changes: 82 additions & 0 deletions tests/test_triplestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,30 @@ def test_triplestore( # pylint: disable=too-many-locals
pytest.importorskip("rdflib")
pytest.importorskip("dlite")
pytest.importorskip("SPARQLWrapper")
from tripper.errors import NamespaceError
from tripper.triplestore import DCTERMS, OWL, RDF, RDFS, XSD, Triplestore

ts = Triplestore(backend)
assert ts.expand_iri("xsd:integer") == XSD.integer
assert ts.prefix_iri(RDF.type) == "rdf:type"

with pytest.raises(NamespaceError):
ts.expand_iri(":MyConcept")

assert ts.prefix_iri("http://example.com#MyConcept") == (
"http://example.com#MyConcept"
)
with pytest.raises(NamespaceError):
ts.prefix_iri("http://example.com#MyConcept", require_prefixed=True)

EX = ts.bind(
"ex", "http://example.com/onto#"
) # pylint: disable=invalid-name
BASE = ts.bind("", "http://example.com#") # pylint: disable=invalid-name

assert ts.expand_iri(":MyConcept") == BASE.MyConcept
assert ts.prefix_iri(BASE.MyConcept) == ":MyConcept"

assert str(EX) == "http://example.com/onto#"
ts.add_mapsTo(
EX.MyConcept, "http://onto-ns.com/meta/0.1/MyEntity", "myprop"
Expand Down Expand Up @@ -497,3 +513,69 @@ def test_bind_errors():
ts2.bind("ex")
with pytest.raises(TypeError):
ts2.bind("ex", Ellipsis)


def test_value():
"""Test Triplestore.value()."""
pytest.importorskip("rdflib")

from tripper import DCTERMS, RDF, RDFS, Literal, Triplestore
from tripper.errors import UniquenessError

ts = Triplestore(backend="rdflib")
EX = ts.bind("ex", "http://example.com#")
l1 = Literal("First comment...")
l2en = Literal("Second comment...", lang="en")
l2da = Literal("Anden kommentar...", lang="da")
l3en = Literal("Third comment...", lang="en")
ts.add_triples(
[
(EX.mydata, RDF.type, EX.Dataset),
(EX.mydata, DCTERMS.title, Literal("My little data")),
(EX.mydata, RDFS.comment, l1),
(EX.mydata, RDFS.comment, l2en),
(EX.mydata, RDFS.comment, l2da),
(EX.mydata, RDFS.comment, l3en),
]
)

assert ts.value(subject=EX.mydata, predicate=RDF.type) == EX.Dataset
assert ts.value(predicate=RDF.type, object=EX.Dataset) == EX.mydata
assert ts.value(subject=EX.mydata, predicate=DCTERMS.title) == Literal(
"My little data"
)

with pytest.raises(UniquenessError):
ts.value(subject=EX.mydata, predicate=RDFS.comment, lang="en")

assert (
ts.value(subject=EX.mydata, predicate=RDFS.comment, lang="da") == l2da
)

assert ts.value(
subject=EX.mydata,
predicate=RDFS.comment,
lang="en",
any=True,
) in (l2en, l3en)

assert set(
ts.value(
subject=EX.mydata,
predicate=RDFS.comment,
lang="en",
any=None,
)
) == {l2en, l3en}

assert (
ts.value(subject=EX.mydata, predicate=RDFS.comment, lang="no") is None
)

t = ts.value(
subject=EX.mydata,
predicate=RDFS.comment,
lang="no",
default="a",
)
assert t == "a"
9 changes: 6 additions & 3 deletions tripper/convert/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,11 @@ def get_obj_iri(obj, uuid):
if isinstance(obj, Mapping):
if not obj:
return OTEIO.Dictionary
obj_iri = f"_:{dict}_{uuid}"
obj_iri = f"_:dict_{uuid}"
elif isinstance(obj, Sequence) and not isinstance(obj, str):
if not obj:
return RDF.List
obj_iri = f"_:{list}_{uuid}"
obj_iri = f"_:list_{uuid}"
elif obj is None:
return OWL.Nothing
else:
Expand Down Expand Up @@ -286,7 +286,10 @@ def get_obj(value):
container[str(key)] = get_obj(value)
elif pred in recognised_iris:
container[recognised_iris[pred]] = get_obj(obj)
elif not ignore_unrecognised and pred not in (RDF.type,):
elif not ignore_unrecognised and pred not in (
RDF.type,
RDFS.subClassOf,
):
raise ValueError(
f"Unrecognised predicate '{pred}' in dict: {iri}"
)
Expand Down
9 changes: 6 additions & 3 deletions tripper/literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,11 @@ def to_python(self):

def n3(self) -> str: # pylint: disable=invalid-name
"""Returns a representation in n3 format."""

form = self.replace("\\", r"\\").replace('"', r"\"")

if self.lang:
return f'"{self}"@{self.lang}'
return f'"{form}"@{self.lang}'
if self.datatype:
return f'"{self}"^^<{self.datatype}>'
return f'"{self}"'
return f'"{form}"^^<{self.datatype}>'
return f'"{form}"'
98 changes: 72 additions & 26 deletions tripper/triplestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,14 @@


# Regular expression matching a prefixed IRI
_MATCH_PREFIXED_IRI = re.compile(r"^([a-z][a-z0-9]*):([^/]{1}.*)$")
_MATCH_PREFIXED_IRI = re.compile(r"^([a-z][a-z0-9]*)?:([^/]{1}.*)$")


class Triplestore:
"""Provides a common frontend to a range of triplestore backends."""

# pylint: disable=too-many-instance-attributes

default_namespaces = {
"xml": XML,
"rdf": RDF,
Expand Down Expand Up @@ -165,6 +167,9 @@ def __init__(
self.namespaces: "Dict[str, Namespace]" = {}
self.closed = False
self.backend_name = backend_name
self.database = database
self.package = package
self.kwargs = kwargs.copy()
self.backend = cls(base_iri=base_iri, database=database, **kwargs)

# Cache functions in the triplestore for fast access
Expand Down Expand Up @@ -588,8 +593,11 @@ def value( # pylint: disable=redefined-builtin
predicate: Possible criteria to match.
object: Possible criteria to match.
default: Value to return if no matches are found.
any: If true, return any matching value, otherwise raise
UniquenessError.
any: Used to define how many values to return. Can be set to:
`False` (default): return the value or raise UniquenessError
if there is more than one matching value.
`True`: return any matching value if there is more than one.
`None`: return a generator over all matching values.
lang: If provided, require that the value must be a localised
literal with the given language code.
Expand All @@ -608,33 +616,30 @@ def value( # pylint: disable=redefined-builtin
(idx,) = [i for i, v in enumerate(spo) if v is None]

triples = self.triples(subject, predicate, object)

if lang:
first = None
if idx != 2:
raise ValueError("`object` must be None if `lang` is given")
for triple in triples:
value = triple[idx]
if isinstance(value, Literal) and value.lang == lang:
if any:
return value
if first:
raise UniquenessError("More than one match")
first = value
if first is None:
return default
else:
try:
triple = next(triples)
except StopIteration:
return default
triples = (
t
for t in triples
if isinstance(t[idx], Literal)
and t[idx].lang == lang # type: ignore
)

if any is None:
return (t[idx] for t in triples) # type: ignore

try:
value = next(triples)[idx]
except StopIteration:
return default

try:
next(triples)
except StopIteration:
return triple[idx]
return value

if any:
return triple[idx]
if any is True:
return value
raise UniquenessError("More than one match")

def subjects(
Expand Down Expand Up @@ -704,22 +709,63 @@ def has(
# ------------------------------------------
def expand_iri(self, iri: str):
"""Return the full IRI if `iri` is prefixed. Otherwise `iri` is
returned."""
returned.
Examples:
>>> from tripper import Triplestore
>>> ts = Triplestore(backend="rdflib")
# Unknown prefix raises an exception
>>> ts.expand_iri("ex:Concept") # doctest: +ELLIPSIS
Traceback (most recent call last):
...
tripper.errors.NamespaceError: unknown namespace: 'ex'
>>> EX = ts.bind("ex", "http://example.com#")
>>> ts.expand_iri("ex:Concept")
'http://example.com#Concept'
# Returns `iri` if it has no prefix
>>> ts.expand_iri("http://example.com#Concept")
'http://example.com#Concept'
"""
match = re.match(_MATCH_PREFIXED_IRI, iri)
if match:
prefix, name = match.groups()
if prefix is None:
prefix = ""
if prefix not in self.namespaces:
raise NamespaceError(f"unknown namespace: {prefix}")
raise NamespaceError(f"unknown namespace: '{prefix}'")
return f"{self.namespaces[prefix]}{name}"
return iri

def prefix_iri(self, iri: str, require_prefixed: bool = False):
# pylint: disable=line-too-long
"""Return prefixed IRI.
This is the reverse of expand_iri().
If `require_prefixed` is true, a NamespaceError exception is raised
if no prefix can be found.
Examples:
>>> from tripper import Triplestore
>>> ts = Triplestore(backend="rdflib")
>>> ts.prefix_iri("http://example.com#Concept")
'http://example.com#Concept'
>>> ts.prefix_iri(
... "http://example.com#Concept", require_prefixed=True
... ) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
tripper.errors.NamespaceError: No prefix defined for IRI: http://example.com#Concept
>>> EX = ts.bind("ex", "http://example.com#")
>>> ts.prefix_iri("http://example.com#Concept")
'ex:Concept'
"""
if not re.match(_MATCH_PREFIXED_IRI, iri):
for prefix, namespace in self.namespaces.items():
Expand Down

0 comments on commit faa0777

Please sign in to comment.