diff --git a/.env-template b/.env-template new file mode 100644 index 0000000..41e804a --- /dev/null +++ b/.env-template @@ -0,0 +1,29 @@ +## --------------------------------------- +## MAIN SITE SETTINGS +## --------------------------------------- +VITE_SIDENAV=true +VITE_ENABLED_PREZS=SpacePrez,VocPrez,CatPrez + +## --------------------------------------- +## MAP DISPLAY DEFAULT SETTINGS +## --------------------------------------- +VITE_MAP_SETTINGS_API_KEY= +VITE_MAP_SETTINGS_OPTIONS_CENTER_LAT=-28.8908 +VITE_MAP_SETTINGS_OPTIONS_CENTER_LNG=132.3757 +VITE_MAP_SETTINGS_OPTIONS_STREETVIEW_CONTROLLER=false +VITE_MAP_SETTINGS_OPTIONS_ZOOM=4 + +## --------------------------------------- +## CONSTANTS FOR THE MAP SEARCH QUERY +## --------------------------------------- +VITE_MAP_SEARCH_SPATIAL_DATASET_CLASS=http://www.w3.org/ns/dcat#Dataset +VITE_MAP_SEARCH_SPATIAL_MEMBERSHIP_RELATIONSHIP=http://www.w3.org/2000/01/rdf-schema#member +VITE_MAP_SEARCH_PROPS_F_ID=http://purl.org/dc/terms/identifier +VITE_MAP_SEARCH_PROPS_F_LABEL=http://www.w3.org/2000/01/rdf-schema#label +VITE_MAP_SEARCH_PROPS_FC_LABEL=https://schema.org/name +VITE_MAP_SEARCH_PROPS_DS_LABEL=https://schema.org/name + +## --------------------------------------- +## LOCAL DEV ENVIRONMENT +## --------------------------------------- +VITE_API_BASE_URL=http://localhost:8000 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cf8ab8e --- /dev/null +++ b/.gitignore @@ -0,0 +1,190 @@ +vocpub.ttl + +# Created by https://www.toptal.com/developers/gitignore/api/python,venv +# Edit at https://www.toptal.com/developers/gitignore?templates=python,venv + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### venv ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +pip-selfcheck.json + +# End of https://www.toptal.com/developers/gitignore/api/python,venv \ No newline at end of file diff --git a/Taskfile.yml b/Taskfile.yml new file mode 100644 index 0000000..f86bb21 --- /dev/null +++ b/Taskfile.yml @@ -0,0 +1,85 @@ +# https://taskfile.dev + +version: "3" + +env: + FUSEKI_URL: http://localhost:3030 + FUSEKI_DATASET_URL: http://localhost:3030/bgs + FUSEKI_USERNAME: admin + FUSEKI_PASSWORD: admin + +tasks: + venv:create: + desc: Create Python virtual environment. + cmds: + - python -m venv .venv + + venv:install: + desc: Install Python dependencies to virtual environment. + cmds: + - .venv/bin/pip install -r requirements.txt + + stack:up: + desc: Start the Prez stack. + cmds: + - docker-compose --profile stack up -d + + stack:down: + desc: Stop the Prez stack. + cmds: + - docker-compose --profile stack down + + stack:fuseki:up: + desc: Start the Fuseki service in the Prez stack. + cmds: + - docker-compose --profile db up -d + + stack:fuseki:down: + desc: Stop the Fuseki service in the Prez stack. + cmds: + - docker-compose --profile db down + + stack:prez:up: + desc: Start the Prez service in the Prez stack. + cmds: + - docker-compose --profile web up -d + + stack:prez:down: + desc: Stop the Prez service in the Prez stack. + cmds: + - docker-compose --profile web down + + stack:prez-ui:up: + desc: Start the Prez UI service in the Prez stack. + cmds: + - docker-compose --profile ui up -d + + stack:prez-ui:down: + desc: Stop the Prez UI service in the Prez stack. + cmds: + - docker-compose --profile ui down + + fuseki:dataset:list: + desc: List Fuseki datasets at $FUSEKI_URL. + cmds: + - .venv/bin/kurra fuseki dataset list $FUSEKI_URL -u $FUSEKI_USERNAME -p $FUSEKI_PASSWORD + + fuseki:dataset:create: + desc: Create Fuseki dataset. Example usage, task fuseki:dataset:create -- bgs + cmds: + - .venv/bin/kurra fuseki dataset create $FUSEKI_URL {{.CLI_ARGS}} -u $FUSEKI_USERNAME -p $FUSEKI_PASSWORD + + vocpub:download: + desc: Download VocPub SHACL shapes validator. + cmds: + - curl -fsSL -H "Accept text/turtle" https://w3id.org/profile/vocpub/validator --output vocpub.ttl + + vocpub: + desc: Validate a file with the VocPub validator. Example usage, task vocpub -- vocab.ttl + cmds: + - .venv/bin/pyshacl -s vocpub.ttl {{.CLI_ARGS}} --allow-warning + + fuseki:upload: + desc: Upload files to BGS Fuseki dataset. + cmds: + - .venv/bin/kurra fuseki upload vocabularies $FUSEKI_DATASET_URL -u $FUSEKI_USERNAME -p $FUSEKI_PASSWORD diff --git a/bgs-prefixes.ttl b/bgs-prefixes.ttl new file mode 100644 index 0000000..fc69e2d --- /dev/null +++ b/bgs-prefixes.ttl @@ -0,0 +1,6 @@ +PREFIX vann: +PREFIX ref: +PREFIX geo-thes: + +geo-thes: vann:preferredNamespacePrefix "geo-thes" . +ref: vann:preferredNamespacePrefix "ref" . \ No newline at end of file diff --git a/dictionaries.nt.errors.txt b/dictionaries.nt.errors.txt new file mode 100644 index 0000000..4b92ed5 --- /dev/null +++ b/dictionaries.nt.errors.txt @@ -0,0 +1,133 @@ +Failed to parse line 6870. > . + +Error: Invalid line: > . +Failed to parse line 6871. . + +Error: Invalid line: . +Failed to parse line 15491. > . + +Error: Invalid line: > . +Failed to parse line 15492. . + +Error: Invalid line: . +Failed to parse line 24112. > . + +Error: Invalid line: > . +Failed to parse line 24113. . + +Error: Invalid line: . +Failed to parse line 31160. " + +Error: Invalid line: " +Failed to parse line 31161. Primary Property Homes Ltd"@en . + +Error: Invalid line: Primary Property Homes Ltd"@en . +Failed to parse line 31900. "OLD SERIES COUNTY 6" FIELD SLIPS"@en . + +Error: Invalid line: FIELD SLIPS"@en . +Failed to parse line 32413. "Open-pit and underground + +Error: Invalid line: "Open-pit and underground +Failed to parse line 32414. "@en . + +Error: Invalid line: "@en . +Failed to parse line 32416. "Rail depot + +Error: Invalid line: "Rail depot +Failed to parse line 32417. "@en . + +Error: Invalid line: "@en . +Failed to parse line 32418. "Underground workings + +Error: Invalid line: "Underground workings +Failed to parse line 32419. "@en . + +Error: Invalid line: "@en . +Failed to parse line 32420. "Wharf + +Error: Invalid line: "Wharf +Failed to parse line 32421. "@en . + +Error: Invalid line: "@en . +Failed to parse line 32738. > ">"@en . + +Error: Invalid line: > ">"@en . +Failed to parse line 32739. "<"@en . + +Error: Invalid line: "<"@en . +Failed to parse line 39786. " + +Error: Invalid line: " +Failed to parse line 39787. Primary Property Homes Ltd"@en . + +Error: Invalid line: Primary Property Homes Ltd"@en . +Failed to parse line 40526. "OLD SERIES COUNTY 6" FIELD SLIPS"@en . + +Error: Invalid line: FIELD SLIPS"@en . +Failed to parse line 41039. "Open-pit and underground + +Error: Invalid line: "Open-pit and underground +Failed to parse line 41040. "@en . + +Error: Invalid line: "@en . +Failed to parse line 41042. "Rail depot + +Error: Invalid line: "Rail depot +Failed to parse line 41043. "@en . + +Error: Invalid line: "@en . +Failed to parse line 41044. "Underground workings + +Error: Invalid line: "Underground workings +Failed to parse line 41045. "@en . + +Error: Invalid line: "@en . +Failed to parse line 41046. "Wharf + +Error: Invalid line: "Wharf +Failed to parse line 41047. "@en . + +Error: Invalid line: "@en . +Failed to parse line 41364. > ">"@en . + +Error: Invalid line: > ">"@en . +Failed to parse line 41365. "<"@en . + +Error: Invalid line: "<"@en . +Failed to parse line 48412. " + +Error: Invalid line: " +Failed to parse line 48413. Primary Property Homes Ltd"@en . + +Error: Invalid line: Primary Property Homes Ltd"@en . +Failed to parse line 49152. "OLD SERIES COUNTY 6" FIELD SLIPS"@en . + +Error: Invalid line: FIELD SLIPS"@en . +Failed to parse line 49665. "Working is partly surface and partly underground, working the same bodies for the same commodities + +Error: Invalid line: "Working is partly surface and partly underground, working the same bodies for the same commodities +Failed to parse line 49666. "@en . + +Error: Invalid line: "@en . +Failed to parse line 49670. "Sea, river or canal wharf where mineral commodities are unloaded and stored + +Error: Invalid line: "Sea, river or canal wharf where mineral commodities are unloaded and stored +Failed to parse line 49671. "@en . + +Error: Invalid line: "@en . +Failed to parse line 49988. > "Greater than"@en . + +Error: Invalid line: > "Greater than"@en . +Failed to parse line 49989. "Less than"@en . + +Error: Invalid line: "Less than"@en . +Failed to parse line 52027. "Dictionary of terms classifying the relationship between units separated by a "unit boundary"';"@en . + +Error: Invalid line: unit boundary"';"@en . +Failed to parse line 52030. "Dictionary of the levels of certainty pertaining to an observed or interpreted geological feature shown on a geological map. GSD Notes: Dictionary very minimalist at present to meet immediate need. "Inferred" could be expanded later to specify various types or methods of inference"@en . + +Error: Invalid line: Inferred" could be expanded later to specify various types or methods of inference"@en . +Failed to parse line 52063. ""Dictionary of business rule categories for BGS databases. This dictionary is used in the Technical Metadata system to indicate the a rule category (s) of the businnes rules defined against database objects (designs) predominantly within the oracle system."@en . + +Error: Invalid line: Dictionary of business rule categories for BGS databases. This dictionary is used in the Technical Metadata system to indicate the a rule category (s) of the businnes rules defined against database objects (designs) predominantly within the oracle system."@en . +Total errors: 44 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..7050d38 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,52 @@ +services: + fuseki: + image: ghcr.io/kurrawong/fuseki-geosparql:git-main-e642d849 + environment: + - JAVA_OPTS=-Xms512m -Xmx2048m + ports: + - 3030:3030 + volumes: + - fuseki-data:/fuseki + profiles: + - db + - stack + + prez: + image: ghcr.io/rdflib/prez:3.3.0 + ports: + - 8000:8000 + volumes: + - ./bgs-prefixes.ttl:/app/prez/reference_data/prefixes/bgs-prefixes.ttl + environment: + - SPARQL_ENDPOINT=http://fuseki:3030/bgs + depends_on: + - fuseki + healthcheck: + test: ["CMD", "curl", "-f", "http://fuseki:3030"] + interval: 5s + timeout: 10s + retries: 3 + start_period: 20s + profiles: + - web + - stack + + prez-ui: + image: ghcr.io/rdflib/prez-ui:3.2.3 + ports: + - 8001:80 + env_file: .env + depends_on: + - prez + healthcheck: + test: ["CMD", "curl", "-f", "http://prez:8000"] + interval: 5s + timeout: 10s + retries: 3 + start_period: 30s + profiles: + - ui + - stack + +volumes: + fuseki-data: diff --git a/earth_material_class.py b/earth_material_class.py new file mode 100644 index 0000000..a6a645e --- /dev/null +++ b/earth_material_class.py @@ -0,0 +1,77 @@ +""" +Earth material class +""" + +import time +from textwrap import dedent + +from rdflib import Graph, URIRef, Literal +from rdflib.namespace import RDF, SKOS + + +concept_scheme_data = """ +PREFIX dcterms: +PREFIX rdfs: +PREFIX skos: +PREFIX xsd: + + + a skos:ConceptScheme ; + dcterms:accessRights ; + dcterms:created "2023-07-28"^^xsd:date ; + dcterms:modified "2023-07-28"^^xsd:date ; + dcterms:creator ; + skos:definition "The BGS Earth Material Class vocabulary."@en ; + dcterms:identifier "http://data.bgs.ac.uk/ref/EarthMaterialClass"^^xsd:anyURI ; + dcterms:publisher ; + skos:prefLabel "Earth Material Class"@en ; + skos:historyNote "BGS GitHub repository"@en ; +. +""" + + +def main() -> None: + starttime = time.time() + + try: + graph = Graph() + graph.parse("vocabularies/earth-material-class.nt") + graph.parse(data=concept_scheme_data) + + concept_scheme = URIRef("http://data.bgs.ac.uk/ref/EarthMaterialClass") + concepts = list(graph.subjects(RDF.type, SKOS.Concept)) + + for concept in concepts: + graph.add((concept, SKOS.inScheme, concept_scheme)) + graph.add((concept, SKOS.historyNote, Literal("From the BGS vocabularies GitHub repository."))) + + query = dedent( + """ + PREFIX skos: + SELECT DISTINCT ?concept + WHERE { + ?concept a skos:Concept . + FILTER NOT EXISTS { + ?concept skos:broader ?parent_concept . + } + FILTER NOT EXISTS { + ?parent_concept skos:narrower ?concept . + } + } + """ + ).strip() + + result = graph.query(query) + + for row in result: + graph.add((concept_scheme, SKOS.hasTopConcept, row["concept"])) + + graph.serialize("vocabularies/earth-material-class.ttl", format="longturtle") + + finally: + endtime = time.time() - starttime + print(f"Completed in {endtime:0.2f} seconds") + + +if __name__ == "__main__": + main() diff --git a/geochronology.py b/geochronology.py new file mode 100644 index 0000000..dfddf14 --- /dev/null +++ b/geochronology.py @@ -0,0 +1,77 @@ +""" +Geochronology +""" + +import time +from textwrap import dedent + +from rdflib import Graph, URIRef, Literal +from rdflib.namespace import RDF, SKOS + + +concept_scheme_data = """ +PREFIX dcterms: +PREFIX rdfs: +PREFIX skos: +PREFIX xsd: + + + a skos:ConceptScheme ; + dcterms:accessRights ; + dcterms:created "2023-07-28"^^xsd:date ; + dcterms:modified "2023-07-28"^^xsd:date ; + dcterms:creator ; + skos:definition "The BGS Geochronology vocabulary."@en ; + dcterms:identifier "http://data.bgs.ac.uk/ref/Geochronology"^^xsd:anyURI ; + dcterms:publisher ; + skos:prefLabel "Geochronology"@en ; + skos:historyNote "BGS GitHub repository"@en ; +. +""" + + +def main() -> None: + starttime = time.time() + + try: + graph = Graph() + graph.parse("vocabularies/geochronology.nt") + graph.parse(data=concept_scheme_data) + + concept_scheme = URIRef("http://data.bgs.ac.uk/ref/Geochronology") + concepts = list(graph.subjects(RDF.type, SKOS.Concept)) + + for concept in concepts: + graph.add((concept, SKOS.inScheme, concept_scheme)) + graph.add((concept, SKOS.historyNote, Literal("From the BGS vocabularies GitHub repository."))) + + query = dedent( + """ + PREFIX skos: + SELECT DISTINCT ?concept + WHERE { + ?concept a skos:Concept . + FILTER NOT EXISTS { + ?concept skos:broader ?parent_concept . + } + FILTER NOT EXISTS { + ?parent_concept skos:narrower ?concept . + } + } + """ + ).strip() + + result = graph.query(query) + + for row in result: + graph.add((concept_scheme, SKOS.hasTopConcept, row["concept"])) + + graph.serialize("vocabularies/geochronology.ttl", format="longturtle") + + finally: + endtime = time.time() - starttime + print(f"Completed in {endtime:0.2f} seconds") + + +if __name__ == "__main__": + main() diff --git a/lexicon_named_rock_unit.py b/lexicon_named_rock_unit.py new file mode 100644 index 0000000..aefe587 --- /dev/null +++ b/lexicon_named_rock_unit.py @@ -0,0 +1,77 @@ +""" +Lexicon Named Rock Unit +""" + +import time +from textwrap import dedent + +from rdflib import Graph, URIRef, Literal +from rdflib.namespace import RDF, SKOS + + +concept_scheme_data = """ +PREFIX dcterms: +PREFIX rdfs: +PREFIX skos: +PREFIX xsd: + + + a skos:ConceptScheme ; + dcterms:accessRights ; + dcterms:created "2023-07-28"^^xsd:date ; + dcterms:modified "2023-07-28"^^xsd:date ; + dcterms:creator ; + skos:definition "The BGS Lexicon Named Rock Unit vocabulary."@en ; + dcterms:identifier "http://data.bgs.ac.uk/ref/Lexicon"^^xsd:anyURI ; + dcterms:publisher ; + skos:prefLabel "Lexicon Named Rock Unit"@en ; + skos:historyNote "BGS GitHub repository"@en ; +. +""" + + +def main() -> None: + starttime = time.time() + + try: + graph = Graph() + graph.parse("vocabularies/lexicon-named-rock-unit.nt") + graph.parse(data=concept_scheme_data) + + concept_scheme = URIRef("http://data.bgs.ac.uk/ref/Lexicon") + concepts = list(graph.subjects(RDF.type, SKOS.Concept)) + + for concept in concepts: + graph.add((concept, SKOS.inScheme, concept_scheme)) + graph.add((concept, SKOS.historyNote, Literal("From the BGS vocabularies GitHub repository."))) + + query = dedent( + """ + PREFIX skos: + SELECT DISTINCT ?concept + WHERE { + ?concept a skos:Concept . + FILTER NOT EXISTS { + ?concept skos:broader ?parent_concept . + } + FILTER NOT EXISTS { + ?parent_concept skos:narrower ?concept . + } + } + """ + ).strip() + + result = graph.query(query) + + for row in result: + graph.add((concept_scheme, SKOS.hasTopConcept, row["concept"])) + + graph.serialize("vocabularies/lexicon-named-rock-unit.ttl", format="longturtle") + + finally: + endtime = time.time() - starttime + print(f"Completed in {endtime:0.2f} seconds") + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..db1029b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +https://github.com/Kurrawong/kurrawong-python/archive/refs/tags/0.3.2.zip +pyshacl>=0.23.0,<1.0.0 +rdflib>=6.3.2,<7.0.0 \ No newline at end of file diff --git a/thesaurus_vocpub.py b/thesaurus_vocpub.py new file mode 100644 index 0000000..90747d3 --- /dev/null +++ b/thesaurus_vocpub.py @@ -0,0 +1,62 @@ +""" +Add triples to pass VocPub validator. +""" + +import time + +from rdflib import Graph, URIRef +from rdflib.namespace import RDF, SKOS + + +def main() -> None: + starttime = time.time() + + try: + graph = Graph() + graph.parse("vocabularies/thesaurus_metadata.ttl") + graph.parse("vocabularies/thesaurus.nt", format="ntriples") + + concepts = graph.subjects(RDF.type, SKOS.Concept) + concept_scheme = graph.value(None, RDF.type, SKOS.ConceptScheme) + top_concept = URIRef("http://data.bgs.ac.uk/id/GeoscienceThesaurus/Concept/0") + + # Fix top concept pointing to concept scheme with trailing slash. + graph.remove( + ( + URIRef("http://data.bgs.ac.uk/ref/GeoscienceThesaurus/"), + SKOS.hasTopConcept, + top_concept, + ) + ) + graph.add((concept_scheme, SKOS.hasTopConcept, top_concept)) + + for concept in concepts: + graph.remove((concept, SKOS.inScheme, None)) + graph.add((concept, SKOS.inScheme, concept_scheme)) + + if graph.value(concept, SKOS.definition, None) is None: + label = graph.value(concept, SKOS.prefLabel, None) + graph.add((concept, SKOS.definition, label)) + + if ( + graph.value(concept, SKOS.broader, None) is None + and graph.value(None, SKOS.narrower, concept) is None + ): + graph.add( + ( + top_concept, + SKOS.narrower, + concept, + ) + ) + graph.add((concept, SKOS.broader, top_concept)) + + graph.serialize("vocabularies/thesaurus.ttl", format="longturtle") + + finally: + endtime = time.time() - starttime + print(f"Completed in {endtime:0.2f} seconds") + + +if __name__ == "__main__": + main() diff --git a/validate_dictionaries_nt.py b/validate_dictionaries_nt.py new file mode 100644 index 0000000..a90c1f9 --- /dev/null +++ b/validate_dictionaries_nt.py @@ -0,0 +1,21 @@ +from rdflib import Graph + +graph = Graph() + +errors = [] + +with open("dictionaries.nt", "r", encoding="utf-8") as file: + for i, line in enumerate(file.readlines()): + lineno = i + 1 + try: + graph.parse(data=line, format="ntriples") + except Exception as err: + errors.append(f"Failed to parse line {lineno}. {line}\nError: {err}") + +graph.serialize("dictionaries.ttl", format="longturtle") + +if errors: + for error in errors: + print(error) + + print(f"Total errors: {len(errors)}")