diff --git a/.env-template b/.env-template
new file mode 100644
index 0000000..41e804a
--- /dev/null
+++ b/.env-template
@@ -0,0 +1,29 @@
+## ---------------------------------------
+## MAIN SITE SETTINGS
+## ---------------------------------------
+VITE_SIDENAV=true
+VITE_ENABLED_PREZS=SpacePrez,VocPrez,CatPrez
+
+## ---------------------------------------
+## MAP DISPLAY DEFAULT SETTINGS
+## ---------------------------------------
+VITE_MAP_SETTINGS_API_KEY=
+VITE_MAP_SETTINGS_OPTIONS_CENTER_LAT=-28.8908
+VITE_MAP_SETTINGS_OPTIONS_CENTER_LNG=132.3757
+VITE_MAP_SETTINGS_OPTIONS_STREETVIEW_CONTROLLER=false
+VITE_MAP_SETTINGS_OPTIONS_ZOOM=4
+
+## ---------------------------------------
+## CONSTANTS FOR THE MAP SEARCH QUERY
+## ---------------------------------------
+VITE_MAP_SEARCH_SPATIAL_DATASET_CLASS=http://www.w3.org/ns/dcat#Dataset
+VITE_MAP_SEARCH_SPATIAL_MEMBERSHIP_RELATIONSHIP=http://www.w3.org/2000/01/rdf-schema#member
+VITE_MAP_SEARCH_PROPS_F_ID=http://purl.org/dc/terms/identifier
+VITE_MAP_SEARCH_PROPS_F_LABEL=http://www.w3.org/2000/01/rdf-schema#label
+VITE_MAP_SEARCH_PROPS_FC_LABEL=https://schema.org/name
+VITE_MAP_SEARCH_PROPS_DS_LABEL=https://schema.org/name
+
+## ---------------------------------------
+## LOCAL DEV ENVIRONMENT
+## ---------------------------------------
+VITE_API_BASE_URL=http://localhost:8000
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cf8ab8e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,190 @@
+vocpub.ttl
+
+# Created by https://www.toptal.com/developers/gitignore/api/python,venv
+# Edit at https://www.toptal.com/developers/gitignore?templates=python,venv
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+### venv ###
+# Virtualenv
+# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
+[Bb]in
+[Ii]nclude
+[Ll]ib
+[Ll]ib64
+[Ll]ocal
+[Ss]cripts
+pyvenv.cfg
+pip-selfcheck.json
+
+# End of https://www.toptal.com/developers/gitignore/api/python,venv
\ No newline at end of file
diff --git a/Taskfile.yml b/Taskfile.yml
new file mode 100644
index 0000000..f86bb21
--- /dev/null
+++ b/Taskfile.yml
@@ -0,0 +1,85 @@
+# https://taskfile.dev
+
+version: "3"
+
+env:
+ FUSEKI_URL: http://localhost:3030
+ FUSEKI_DATASET_URL: http://localhost:3030/bgs
+ FUSEKI_USERNAME: admin
+ FUSEKI_PASSWORD: admin
+
+tasks:
+ venv:create:
+ desc: Create Python virtual environment.
+ cmds:
+ - python -m venv .venv
+
+ venv:install:
+ desc: Install Python dependencies to virtual environment.
+ cmds:
+ - .venv/bin/pip install -r requirements.txt
+
+ stack:up:
+ desc: Start the Prez stack.
+ cmds:
+ - docker-compose --profile stack up -d
+
+ stack:down:
+ desc: Stop the Prez stack.
+ cmds:
+ - docker-compose --profile stack down
+
+ stack:fuseki:up:
+ desc: Start the Fuseki service in the Prez stack.
+ cmds:
+ - docker-compose --profile db up -d
+
+ stack:fuseki:down:
+ desc: Stop the Fuseki service in the Prez stack.
+ cmds:
+ - docker-compose --profile db down
+
+ stack:prez:up:
+ desc: Start the Prez service in the Prez stack.
+ cmds:
+ - docker-compose --profile web up -d
+
+ stack:prez:down:
+ desc: Stop the Prez service in the Prez stack.
+ cmds:
+ - docker-compose --profile web down
+
+ stack:prez-ui:up:
+ desc: Start the Prez UI service in the Prez stack.
+ cmds:
+ - docker-compose --profile ui up -d
+
+ stack:prez-ui:down:
+ desc: Stop the Prez UI service in the Prez stack.
+ cmds:
+ - docker-compose --profile ui down
+
+ fuseki:dataset:list:
+ desc: List Fuseki datasets at $FUSEKI_URL.
+ cmds:
+ - .venv/bin/kurra fuseki dataset list $FUSEKI_URL -u $FUSEKI_USERNAME -p $FUSEKI_PASSWORD
+
+ fuseki:dataset:create:
+ desc: Create Fuseki dataset. Example usage, task fuseki:dataset:create -- bgs
+ cmds:
+ - .venv/bin/kurra fuseki dataset create $FUSEKI_URL {{.CLI_ARGS}} -u $FUSEKI_USERNAME -p $FUSEKI_PASSWORD
+
+ vocpub:download:
+ desc: Download VocPub SHACL shapes validator.
+ cmds:
+ - curl -fsSL -H "Accept text/turtle" https://w3id.org/profile/vocpub/validator --output vocpub.ttl
+
+ vocpub:
+ desc: Validate a file with the VocPub validator. Example usage, task vocpub -- vocab.ttl
+ cmds:
+ - .venv/bin/pyshacl -s vocpub.ttl {{.CLI_ARGS}} --allow-warning
+
+ fuseki:upload:
+ desc: Upload files to BGS Fuseki dataset.
+ cmds:
+ - .venv/bin/kurra fuseki upload vocabularies $FUSEKI_DATASET_URL -u $FUSEKI_USERNAME -p $FUSEKI_PASSWORD
diff --git a/bgs-prefixes.ttl b/bgs-prefixes.ttl
new file mode 100644
index 0000000..fc69e2d
--- /dev/null
+++ b/bgs-prefixes.ttl
@@ -0,0 +1,6 @@
+PREFIX vann:
+PREFIX ref:
+PREFIX geo-thes:
+
+geo-thes: vann:preferredNamespacePrefix "geo-thes" .
+ref: vann:preferredNamespacePrefix "ref" .
\ No newline at end of file
diff --git a/dictionaries.nt.errors.txt b/dictionaries.nt.errors.txt
new file mode 100644
index 0000000..4b92ed5
--- /dev/null
+++ b/dictionaries.nt.errors.txt
@@ -0,0 +1,133 @@
+Failed to parse line 6870. > .
+
+Error: Invalid line: > .
+Failed to parse line 6871. .
+
+Error: Invalid line: .
+Failed to parse line 15491. > .
+
+Error: Invalid line: > .
+Failed to parse line 15492. .
+
+Error: Invalid line: .
+Failed to parse line 24112. > .
+
+Error: Invalid line: > .
+Failed to parse line 24113. .
+
+Error: Invalid line: .
+Failed to parse line 31160. "
+
+Error: Invalid line: "
+Failed to parse line 31161. Primary Property Homes Ltd"@en .
+
+Error: Invalid line: Primary Property Homes Ltd"@en .
+Failed to parse line 31900. "OLD SERIES COUNTY 6" FIELD SLIPS"@en .
+
+Error: Invalid line: FIELD SLIPS"@en .
+Failed to parse line 32413. "Open-pit and underground
+
+Error: Invalid line: "Open-pit and underground
+Failed to parse line 32414. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 32416. "Rail depot
+
+Error: Invalid line: "Rail depot
+Failed to parse line 32417. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 32418. "Underground workings
+
+Error: Invalid line: "Underground workings
+Failed to parse line 32419. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 32420. "Wharf
+
+Error: Invalid line: "Wharf
+Failed to parse line 32421. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 32738. > ">"@en .
+
+Error: Invalid line: > ">"@en .
+Failed to parse line 32739. "<"@en .
+
+Error: Invalid line: "<"@en .
+Failed to parse line 39786. "
+
+Error: Invalid line: "
+Failed to parse line 39787. Primary Property Homes Ltd"@en .
+
+Error: Invalid line: Primary Property Homes Ltd"@en .
+Failed to parse line 40526. "OLD SERIES COUNTY 6" FIELD SLIPS"@en .
+
+Error: Invalid line: FIELD SLIPS"@en .
+Failed to parse line 41039. "Open-pit and underground
+
+Error: Invalid line: "Open-pit and underground
+Failed to parse line 41040. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 41042. "Rail depot
+
+Error: Invalid line: "Rail depot
+Failed to parse line 41043. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 41044. "Underground workings
+
+Error: Invalid line: "Underground workings
+Failed to parse line 41045. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 41046. "Wharf
+
+Error: Invalid line: "Wharf
+Failed to parse line 41047. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 41364. > ">"@en .
+
+Error: Invalid line: > ">"@en .
+Failed to parse line 41365. "<"@en .
+
+Error: Invalid line: "<"@en .
+Failed to parse line 48412. "
+
+Error: Invalid line: "
+Failed to parse line 48413. Primary Property Homes Ltd"@en .
+
+Error: Invalid line: Primary Property Homes Ltd"@en .
+Failed to parse line 49152. "OLD SERIES COUNTY 6" FIELD SLIPS"@en .
+
+Error: Invalid line: FIELD SLIPS"@en .
+Failed to parse line 49665. "Working is partly surface and partly underground, working the same bodies for the same commodities
+
+Error: Invalid line: "Working is partly surface and partly underground, working the same bodies for the same commodities
+Failed to parse line 49666. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 49670. "Sea, river or canal wharf where mineral commodities are unloaded and stored
+
+Error: Invalid line: "Sea, river or canal wharf where mineral commodities are unloaded and stored
+Failed to parse line 49671. "@en .
+
+Error: Invalid line: "@en .
+Failed to parse line 49988. > "Greater than"@en .
+
+Error: Invalid line: > "Greater than"@en .
+Failed to parse line 49989. "Less than"@en .
+
+Error: Invalid line: "Less than"@en .
+Failed to parse line 52027. "Dictionary of terms classifying the relationship between units separated by a "unit boundary"';"@en .
+
+Error: Invalid line: unit boundary"';"@en .
+Failed to parse line 52030. "Dictionary of the levels of certainty pertaining to an observed or interpreted geological feature shown on a geological map. GSD Notes: Dictionary very minimalist at present to meet immediate need. "Inferred" could be expanded later to specify various types or methods of inference"@en .
+
+Error: Invalid line: Inferred" could be expanded later to specify various types or methods of inference"@en .
+Failed to parse line 52063. ""Dictionary of business rule categories for BGS databases. This dictionary is used in the Technical Metadata system to indicate the a rule category (s) of the businnes rules defined against database objects (designs) predominantly within the oracle system."@en .
+
+Error: Invalid line: Dictionary of business rule categories for BGS databases. This dictionary is used in the Technical Metadata system to indicate the a rule category (s) of the businnes rules defined against database objects (designs) predominantly within the oracle system."@en .
+Total errors: 44
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..7050d38
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,52 @@
+services:
+ fuseki:
+ image: ghcr.io/kurrawong/fuseki-geosparql:git-main-e642d849
+ environment:
+ - JAVA_OPTS=-Xms512m -Xmx2048m
+ ports:
+ - 3030:3030
+ volumes:
+ - fuseki-data:/fuseki
+ profiles:
+ - db
+ - stack
+
+ prez:
+ image: ghcr.io/rdflib/prez:3.3.0
+ ports:
+ - 8000:8000
+ volumes:
+ - ./bgs-prefixes.ttl:/app/prez/reference_data/prefixes/bgs-prefixes.ttl
+ environment:
+ - SPARQL_ENDPOINT=http://fuseki:3030/bgs
+ depends_on:
+ - fuseki
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://fuseki:3030"]
+ interval: 5s
+ timeout: 10s
+ retries: 3
+ start_period: 20s
+ profiles:
+ - web
+ - stack
+
+ prez-ui:
+ image: ghcr.io/rdflib/prez-ui:3.2.3
+ ports:
+ - 8001:80
+ env_file: .env
+ depends_on:
+ - prez
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://prez:8000"]
+ interval: 5s
+ timeout: 10s
+ retries: 3
+ start_period: 30s
+ profiles:
+ - ui
+ - stack
+
+volumes:
+ fuseki-data:
diff --git a/earth_material_class.py b/earth_material_class.py
new file mode 100644
index 0000000..a6a645e
--- /dev/null
+++ b/earth_material_class.py
@@ -0,0 +1,77 @@
+"""
+Earth material class
+"""
+
+import time
+from textwrap import dedent
+
+from rdflib import Graph, URIRef, Literal
+from rdflib.namespace import RDF, SKOS
+
+
+concept_scheme_data = """
+PREFIX dcterms:
+PREFIX rdfs:
+PREFIX skos:
+PREFIX xsd:
+
+
+ a skos:ConceptScheme ;
+ dcterms:accessRights ;
+ dcterms:created "2023-07-28"^^xsd:date ;
+ dcterms:modified "2023-07-28"^^xsd:date ;
+ dcterms:creator ;
+ skos:definition "The BGS Earth Material Class vocabulary."@en ;
+ dcterms:identifier "http://data.bgs.ac.uk/ref/EarthMaterialClass"^^xsd:anyURI ;
+ dcterms:publisher ;
+ skos:prefLabel "Earth Material Class"@en ;
+ skos:historyNote "BGS GitHub repository"@en ;
+.
+"""
+
+
+def main() -> None:
+ starttime = time.time()
+
+ try:
+ graph = Graph()
+ graph.parse("vocabularies/earth-material-class.nt")
+ graph.parse(data=concept_scheme_data)
+
+ concept_scheme = URIRef("http://data.bgs.ac.uk/ref/EarthMaterialClass")
+ concepts = list(graph.subjects(RDF.type, SKOS.Concept))
+
+ for concept in concepts:
+ graph.add((concept, SKOS.inScheme, concept_scheme))
+ graph.add((concept, SKOS.historyNote, Literal("From the BGS vocabularies GitHub repository.")))
+
+ query = dedent(
+ """
+ PREFIX skos:
+ SELECT DISTINCT ?concept
+ WHERE {
+ ?concept a skos:Concept .
+ FILTER NOT EXISTS {
+ ?concept skos:broader ?parent_concept .
+ }
+ FILTER NOT EXISTS {
+ ?parent_concept skos:narrower ?concept .
+ }
+ }
+ """
+ ).strip()
+
+ result = graph.query(query)
+
+ for row in result:
+ graph.add((concept_scheme, SKOS.hasTopConcept, row["concept"]))
+
+ graph.serialize("vocabularies/earth-material-class.ttl", format="longturtle")
+
+ finally:
+ endtime = time.time() - starttime
+ print(f"Completed in {endtime:0.2f} seconds")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/geochronology.py b/geochronology.py
new file mode 100644
index 0000000..dfddf14
--- /dev/null
+++ b/geochronology.py
@@ -0,0 +1,77 @@
+"""
+Geochronology
+"""
+
+import time
+from textwrap import dedent
+
+from rdflib import Graph, URIRef, Literal
+from rdflib.namespace import RDF, SKOS
+
+
+concept_scheme_data = """
+PREFIX dcterms:
+PREFIX rdfs:
+PREFIX skos:
+PREFIX xsd:
+
+
+ a skos:ConceptScheme ;
+ dcterms:accessRights ;
+ dcterms:created "2023-07-28"^^xsd:date ;
+ dcterms:modified "2023-07-28"^^xsd:date ;
+ dcterms:creator ;
+ skos:definition "The BGS Geochronology vocabulary."@en ;
+ dcterms:identifier "http://data.bgs.ac.uk/ref/Geochronology"^^xsd:anyURI ;
+ dcterms:publisher ;
+ skos:prefLabel "Geochronology"@en ;
+ skos:historyNote "BGS GitHub repository"@en ;
+.
+"""
+
+
+def main() -> None:
+ starttime = time.time()
+
+ try:
+ graph = Graph()
+ graph.parse("vocabularies/geochronology.nt")
+ graph.parse(data=concept_scheme_data)
+
+ concept_scheme = URIRef("http://data.bgs.ac.uk/ref/Geochronology")
+ concepts = list(graph.subjects(RDF.type, SKOS.Concept))
+
+ for concept in concepts:
+ graph.add((concept, SKOS.inScheme, concept_scheme))
+ graph.add((concept, SKOS.historyNote, Literal("From the BGS vocabularies GitHub repository.")))
+
+ query = dedent(
+ """
+ PREFIX skos:
+ SELECT DISTINCT ?concept
+ WHERE {
+ ?concept a skos:Concept .
+ FILTER NOT EXISTS {
+ ?concept skos:broader ?parent_concept .
+ }
+ FILTER NOT EXISTS {
+ ?parent_concept skos:narrower ?concept .
+ }
+ }
+ """
+ ).strip()
+
+ result = graph.query(query)
+
+ for row in result:
+ graph.add((concept_scheme, SKOS.hasTopConcept, row["concept"]))
+
+ graph.serialize("vocabularies/geochronology.ttl", format="longturtle")
+
+ finally:
+ endtime = time.time() - starttime
+ print(f"Completed in {endtime:0.2f} seconds")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/lexicon_named_rock_unit.py b/lexicon_named_rock_unit.py
new file mode 100644
index 0000000..aefe587
--- /dev/null
+++ b/lexicon_named_rock_unit.py
@@ -0,0 +1,77 @@
+"""
+Lexicon Named Rock Unit
+"""
+
+import time
+from textwrap import dedent
+
+from rdflib import Graph, URIRef, Literal
+from rdflib.namespace import RDF, SKOS
+
+
+concept_scheme_data = """
+PREFIX dcterms:
+PREFIX rdfs:
+PREFIX skos:
+PREFIX xsd:
+
+
+ a skos:ConceptScheme ;
+ dcterms:accessRights ;
+ dcterms:created "2023-07-28"^^xsd:date ;
+ dcterms:modified "2023-07-28"^^xsd:date ;
+ dcterms:creator ;
+ skos:definition "The BGS Lexicon Named Rock Unit vocabulary."@en ;
+ dcterms:identifier "http://data.bgs.ac.uk/ref/Lexicon"^^xsd:anyURI ;
+ dcterms:publisher ;
+ skos:prefLabel "Lexicon Named Rock Unit"@en ;
+ skos:historyNote "BGS GitHub repository"@en ;
+.
+"""
+
+
+def main() -> None:
+ starttime = time.time()
+
+ try:
+ graph = Graph()
+ graph.parse("vocabularies/lexicon-named-rock-unit.nt")
+ graph.parse(data=concept_scheme_data)
+
+ concept_scheme = URIRef("http://data.bgs.ac.uk/ref/Lexicon")
+ concepts = list(graph.subjects(RDF.type, SKOS.Concept))
+
+ for concept in concepts:
+ graph.add((concept, SKOS.inScheme, concept_scheme))
+ graph.add((concept, SKOS.historyNote, Literal("From the BGS vocabularies GitHub repository.")))
+
+ query = dedent(
+ """
+ PREFIX skos:
+ SELECT DISTINCT ?concept
+ WHERE {
+ ?concept a skos:Concept .
+ FILTER NOT EXISTS {
+ ?concept skos:broader ?parent_concept .
+ }
+ FILTER NOT EXISTS {
+ ?parent_concept skos:narrower ?concept .
+ }
+ }
+ """
+ ).strip()
+
+ result = graph.query(query)
+
+ for row in result:
+ graph.add((concept_scheme, SKOS.hasTopConcept, row["concept"]))
+
+ graph.serialize("vocabularies/lexicon-named-rock-unit.ttl", format="longturtle")
+
+ finally:
+ endtime = time.time() - starttime
+ print(f"Completed in {endtime:0.2f} seconds")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..db1029b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+https://github.com/Kurrawong/kurrawong-python/archive/refs/tags/0.3.2.zip
+pyshacl>=0.23.0,<1.0.0
+rdflib>=6.3.2,<7.0.0
\ No newline at end of file
diff --git a/thesaurus_vocpub.py b/thesaurus_vocpub.py
new file mode 100644
index 0000000..90747d3
--- /dev/null
+++ b/thesaurus_vocpub.py
@@ -0,0 +1,62 @@
+"""
+Add triples to pass VocPub validator.
+"""
+
+import time
+
+from rdflib import Graph, URIRef
+from rdflib.namespace import RDF, SKOS
+
+
+def main() -> None:
+ starttime = time.time()
+
+ try:
+ graph = Graph()
+ graph.parse("vocabularies/thesaurus_metadata.ttl")
+ graph.parse("vocabularies/thesaurus.nt", format="ntriples")
+
+ concepts = graph.subjects(RDF.type, SKOS.Concept)
+ concept_scheme = graph.value(None, RDF.type, SKOS.ConceptScheme)
+ top_concept = URIRef("http://data.bgs.ac.uk/id/GeoscienceThesaurus/Concept/0")
+
+ # Fix top concept pointing to concept scheme with trailing slash.
+ graph.remove(
+ (
+ URIRef("http://data.bgs.ac.uk/ref/GeoscienceThesaurus/"),
+ SKOS.hasTopConcept,
+ top_concept,
+ )
+ )
+ graph.add((concept_scheme, SKOS.hasTopConcept, top_concept))
+
+ for concept in concepts:
+ graph.remove((concept, SKOS.inScheme, None))
+ graph.add((concept, SKOS.inScheme, concept_scheme))
+
+ if graph.value(concept, SKOS.definition, None) is None:
+ label = graph.value(concept, SKOS.prefLabel, None)
+ graph.add((concept, SKOS.definition, label))
+
+ if (
+ graph.value(concept, SKOS.broader, None) is None
+ and graph.value(None, SKOS.narrower, concept) is None
+ ):
+ graph.add(
+ (
+ top_concept,
+ SKOS.narrower,
+ concept,
+ )
+ )
+ graph.add((concept, SKOS.broader, top_concept))
+
+ graph.serialize("vocabularies/thesaurus.ttl", format="longturtle")
+
+ finally:
+ endtime = time.time() - starttime
+ print(f"Completed in {endtime:0.2f} seconds")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/validate_dictionaries_nt.py b/validate_dictionaries_nt.py
new file mode 100644
index 0000000..a90c1f9
--- /dev/null
+++ b/validate_dictionaries_nt.py
@@ -0,0 +1,21 @@
+from rdflib import Graph
+
+graph = Graph()
+
+errors = []
+
+with open("dictionaries.nt", "r", encoding="utf-8") as file:
+ for i, line in enumerate(file.readlines()):
+ lineno = i + 1
+ try:
+ graph.parse(data=line, format="ntriples")
+ except Exception as err:
+ errors.append(f"Failed to parse line {lineno}. {line}\nError: {err}")
+
+graph.serialize("dictionaries.ttl", format="longturtle")
+
+if errors:
+ for error in errors:
+ print(error)
+
+ print(f"Total errors: {len(errors)}")