From 3afba5d0bb3b17eda3e82c8f95f9b7a38ff6f758 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Sat, 9 Sep 2023 14:41:13 +0200 Subject: [PATCH 01/60] realized we need the data for the ingest but moving the log elsewhere --- .gitignore | 2 +- src/py/debug-logconf.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index ca23aec..d9edd80 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ .env -data/ +*.log __pycache__ \ No newline at end of file diff --git a/src/py/debug-logconf.yml b/src/py/debug-logconf.yml index baac606..9884ee3 100644 --- a/src/py/debug-logconf.yml +++ b/src/py/debug-logconf.yml @@ -15,8 +15,8 @@ handlers: class: logging.FileHandler level: DEBUG formatter: base - filename: ../../data/lwua-ingest-debug.log - mode: 'w' # in stead of 'a' + filename: lwua-ingest-debug.log + mode: 'a' # in stead of 'w' loggers: __main__: level: DEBUG From 342d63e4ce296db7674d36a141fd733e688be04a Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Sun, 10 Sep 2023 13:43:05 +0200 Subject: [PATCH 02/60] docker image builds python using poetry reverses symlink direction to make docker happy (and the other usage satisfied) applies the poetry build inside docker inage build to fix #1 --- docker/lwua-ingest/Dockerfile | 39 ++++++++++++++----- .../lwua-ingest/lwua-py}/debug-logconf.yml | 0 .../lwua-ingest/lwua-py}/lwua/__init__.py | 0 .../lwua-ingest/lwua-py}/lwua/__main__.py | 0 .../lwua-ingest/lwua-py}/lwua/daemon.py | 0 .../lwua-ingest/lwua-py}/lwua/helpers.py | 0 .../lwua-ingest/lwua-py}/lwua/ingest.py | 0 .../lwua-ingest/lwua-py}/lwua/schedule.py | 5 ++- .../lwua-ingest/lwua-py}/poetry.lock | 0 .../lwua-ingest/lwua-py}/pyproject.toml | 1 - docker/lwua-ingest/lwua_ingest | 1 - src/py/ipynb | 1 + src/py/lwua-py | 1 + 13 files changed, 35 insertions(+), 13 deletions(-) rename {src/py => docker/lwua-ingest/lwua-py}/debug-logconf.yml (100%) rename {src/py => docker/lwua-ingest/lwua-py}/lwua/__init__.py (100%) rename {src/py => docker/lwua-ingest/lwua-py}/lwua/__main__.py (100%) rename {src/py => docker/lwua-ingest/lwua-py}/lwua/daemon.py (100%) rename {src/py => docker/lwua-ingest/lwua-py}/lwua/helpers.py (100%) rename {src/py => docker/lwua-ingest/lwua-py}/lwua/ingest.py (100%) rename {src/py => docker/lwua-ingest/lwua-py}/lwua/schedule.py (85%) rename {src/py => docker/lwua-ingest/lwua-py}/poetry.lock (100%) rename {src/py => docker/lwua-ingest/lwua-py}/pyproject.toml (94%) delete mode 120000 docker/lwua-ingest/lwua_ingest create mode 120000 src/py/ipynb create mode 120000 src/py/lwua-py diff --git a/docker/lwua-ingest/Dockerfile b/docker/lwua-ingest/Dockerfile index 24544d4..c81174c 100644 --- a/docker/lwua-ingest/Dockerfile +++ b/docker/lwua-ingest/Dockerfile @@ -1,13 +1,34 @@ -FROM python:3.10-buster +FROM python:3.10-slim as python +ENV PYTHONUNBUFFERED=true +WORKDIR /lwua-py -COPY . /lwua -WORKDIR /lwua +# check for inspiration on including poetry +# - https://hub.docker.com/r/airdock/python-poetry +# - https://binx.io/nl/2022/06/13/poetry-docker/ -# TODO COPY and build the lwua python module -# but rather swithch to using poetry -- mpo has pattern for making it available inside docker -#RUN python -m pip install --upgrade pip && \ -# pip install --no-cache-dir -r requirements.txt && \ -# python setup.py install +# create an image versioin point where poetry and its dependencies are available +# and use that to build the python package locally +FROM python as poetry +# gcc needed in the build of many python dependencies +# removed from python-slim for size trimming - but have to re-add here +RUN apt-get update -y && apt-get upgrade -y && apt-get install -y gcc +ENV POETRY_HOME=/opt/poetry +ENV POETRY_VIRTUALENVS_IN_PROJECT=true +ENV PATH="$POETRY_HOME/bin:$PATH" +RUN python -c 'from urllib.request import urlopen; print(urlopen("https://install.python-poetry.org").read().decode())' | python - -ENTRYPOINT ["./entrypoint.sh"] +# get the source code in +COPY ./lwua-py ./ +# use poetry to build and install -- creating the local .venv +RUN poetry install --no-interaction --no-ansi -vvv + + +# now go back to the original slim image to build the runtime image +# and just grab the build env from the intermediate stage +FROM python as runtime +# ensure the .venv python is used +ENV PATH="/lwua-py/.venv/bin:$PATH" +# ensure we have the build folder from the poetry stage of this image +COPY --from=poetry /lwua-py /lwua-py +ENTRYPOINT ["python", "-m", "lwua"] diff --git a/src/py/debug-logconf.yml b/docker/lwua-ingest/lwua-py/debug-logconf.yml similarity index 100% rename from src/py/debug-logconf.yml rename to docker/lwua-ingest/lwua-py/debug-logconf.yml diff --git a/src/py/lwua/__init__.py b/docker/lwua-ingest/lwua-py/lwua/__init__.py similarity index 100% rename from src/py/lwua/__init__.py rename to docker/lwua-ingest/lwua-py/lwua/__init__.py diff --git a/src/py/lwua/__main__.py b/docker/lwua-ingest/lwua-py/lwua/__main__.py similarity index 100% rename from src/py/lwua/__main__.py rename to docker/lwua-ingest/lwua-py/lwua/__main__.py diff --git a/src/py/lwua/daemon.py b/docker/lwua-ingest/lwua-py/lwua/daemon.py similarity index 100% rename from src/py/lwua/daemon.py rename to docker/lwua-ingest/lwua-py/lwua/daemon.py diff --git a/src/py/lwua/helpers.py b/docker/lwua-ingest/lwua-py/lwua/helpers.py similarity index 100% rename from src/py/lwua/helpers.py rename to docker/lwua-ingest/lwua-py/lwua/helpers.py diff --git a/src/py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py similarity index 100% rename from src/py/lwua/ingest.py rename to docker/lwua-ingest/lwua-py/lwua/ingest.py diff --git a/src/py/lwua/schedule.py b/docker/lwua-ingest/lwua-py/lwua/schedule.py similarity index 85% rename from src/py/lwua/schedule.py rename to docker/lwua-ingest/lwua-py/lwua/schedule.py index 3593fd3..70b3e12 100644 --- a/src/py/lwua/schedule.py +++ b/docker/lwua-ingest/lwua-py/lwua/schedule.py @@ -19,8 +19,9 @@ def main_schedule(): class LWUAScheduler(BlockingScheduler): def __init__(self, run_on_start: bool = True): - # timeprops: dict = dict(minutes=30) - timeprops: dict = dict(seconds=5) + # todo consider injecting interval through .env + timeprops: dict = dict(minutes=30) + # timeprops: dict = dict(seconds=5) super().__init__() self._run_on_start = run_on_start self.add_job(lambda: main_schedule(), 'interval', **timeprops) diff --git a/src/py/poetry.lock b/docker/lwua-ingest/lwua-py/poetry.lock similarity index 100% rename from src/py/poetry.lock rename to docker/lwua-ingest/lwua-py/poetry.lock diff --git a/src/py/pyproject.toml b/docker/lwua-ingest/lwua-py/pyproject.toml similarity index 94% rename from src/py/pyproject.toml rename to docker/lwua-ingest/lwua-py/pyproject.toml index 852f8d9..98e482d 100644 --- a/src/py/pyproject.toml +++ b/docker/lwua-ingest/lwua-py/pyproject.toml @@ -4,7 +4,6 @@ version = "0.0.0" description = "lifewatch user analysis" authors = ["Laurian Van Maldeghem "] license = "CC0" -readme = "README.md" [tool.poetry.dependencies] python = "^3.10" diff --git a/docker/lwua-ingest/lwua_ingest b/docker/lwua-ingest/lwua_ingest deleted file mode 120000 index 1420a01..0000000 --- a/docker/lwua-ingest/lwua_ingest +++ /dev/null @@ -1 +0,0 @@ -../../src/py/lwua_ingest/ \ No newline at end of file diff --git a/src/py/ipynb b/src/py/ipynb new file mode 120000 index 0000000..da8215f --- /dev/null +++ b/src/py/ipynb @@ -0,0 +1 @@ +../../docker/jupyter/notebooks/ \ No newline at end of file diff --git a/src/py/lwua-py b/src/py/lwua-py new file mode 120000 index 0000000..6637b53 --- /dev/null +++ b/src/py/lwua-py @@ -0,0 +1 @@ +../../docker/lwua-ingest/lwua-py/ \ No newline at end of file From 3a3c16991df8a5b5271e1b0966ae902186908b64 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Sun, 10 Sep 2023 13:44:28 +0200 Subject: [PATCH 03/60] apply image names further enhances the request from #3 some path changes allong the way (conform the reeversed symlinks) --- docker/docker-compose.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 2b001ed..36881a4 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -4,22 +4,25 @@ services: jupyter: build: context: ./jupyter + image: lwua/lwua_jupyter container_name: lwua_jupyter ports: - "8888:8888" links: - graphdb volumes: - - "../src/py/ipynb:/notebooks" + - "./notebooks:/notebooks" labels: be.vliz.container.project: "LWUA" be.vliz.container.group: "services" graphdb: + # todo consider a local build that also initializes a repository for lwua inside this lwua_graphdb! image: ontotext/graphdb:10.0.2 container_name: lwua_graphdb ports: - 7200:7200 # HTTP + # todo - think about volumes for persistence of data labels: be.vliz.container.project: "LWUA" be.vliz.container.group: "services" @@ -28,6 +31,7 @@ services: build: context: ./lwua-ingest/ #args: + image: lwua/lwua_ingest container_name: lwua_ingest volumes: - ../data:/data # Store for any input data @@ -39,6 +43,8 @@ services: # towards deploy -- make restart and keep service running -- consequence: use ctrl-c to stop # restart: unless-stopped # command: start + links: + - graphdb logging: driver: json-file options: From 8d6bc1a8e3fd867135367d714030ad7132399ff4 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Sun, 10 Sep 2023 16:25:59 +0200 Subject: [PATCH 04/60] getting the graphdb to work together with the sparqlwrapper this marks an important milestone for #4 as we can now insert triples! --- docker/lwua-ingest/lwua-py/lwua/ingest.py | 57 ++++++++++++++++++----- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 5eba066..0422810 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -9,22 +9,57 @@ def run_ingest(): log.info("run_ingest") -def temp_code_testing(): - hostname = "localhost" - port = 7200 +def gdb_from_config(insert: bool = False): + # todo get base and repoid from .env + base = "http://localhost:7200" repoid = "lwua23" - sparql = SPARQLWrapper(f"http://{ hostname }:{ port }/repositories/{ repoid }") + endpoint = f"{ base }/repositories/{ repoid }" + updateEndpoint = endpoint + "/statements" # update statements are handled at other endpoint - file = "data/project.ttl" - # convert the file to n3 + gdb = SPARQLWrapper( + endpoint=endpoint, + updateEndpoint=updateEndpoint, + returnFormat='json', + agent="lwua-python-sparql-client" + ) + gdb.method = 'POST' + return gdb + + +def ingest_testing(): + + file = "../../data/project.ttl" + # todo convert the file to n3 + # for now hardcoded sample: n3str = """ - . + . """ + + # define a graph -- todo find better id graphid = f"https://example.org/lwua23/{ file }" - inserts = f"INSERT DATA {{ GRAPH <{ graphid }> { n3str } }}" + # assemble the insert statement + inserts = f"INSERT DATA {{ GRAPH <{ graphid }> {{ { n3str } }} }}" + print(inserts) - sparql.setQuery(inserts) - sparql.setReturnFormat(JSON) - results = sparql.query().convert() + gdb = gdb_from_config() + gdb.setQuery(inserts) + gdb.queryType = 'INSERT' # important to indicate that the update Endpoint should be used + + results = gdb.query().convert() + print(results) return results + + +def main(): + logging.basicConfig() + logging.getLogger().setLevel(logging.DEBUG) + reqlog = logging.getLogger('requests.packages.urllib3') + reqlog.setLevel(logging.DEBUG) + reqlog.propagate = True + + ingest_testing() + + +if __name__ == '__main__': + main() From d39c31455f4506dfd43486e7b0573e6108f31dc4 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Sun, 10 Sep 2023 16:26:56 +0200 Subject: [PATCH 05/60] minor cleanup --- docker/lwua-ingest/lwua-py/lwua/ingest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 0422810..22c0c46 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -9,7 +9,7 @@ def run_ingest(): log.info("run_ingest") -def gdb_from_config(insert: bool = False): +def gdb_from_config(): # todo get base and repoid from .env base = "http://localhost:7200" repoid = "lwua23" @@ -20,7 +20,7 @@ def gdb_from_config(insert: bool = False): gdb = SPARQLWrapper( endpoint=endpoint, updateEndpoint=updateEndpoint, - returnFormat='json', + returnFormat=JSON, agent="lwua-python-sparql-client" ) gdb.method = 'POST' @@ -35,7 +35,7 @@ def ingest_testing(): n3str = """ . """ - + # define a graph -- todo find better id graphid = f"https://example.org/lwua23/{ file }" # assemble the insert statement From c2d53c54b1952ef5b82d2b5ddd7531fded476afb Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Mon, 11 Sep 2023 13:40:18 +0200 Subject: [PATCH 06/60] create a local graphd-db image that initializes the database --- docker/graphdb/Dockerfile | 7 ++++ docker/graphdb/initdb/init_graphdb.sh | 46 +++++++++++++++++++++++++++ docker/graphdb/initdb/lwua23-repo.ttl | 42 ++++++++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 docker/graphdb/Dockerfile create mode 100755 docker/graphdb/initdb/init_graphdb.sh create mode 100644 docker/graphdb/initdb/lwua23-repo.ttl diff --git a/docker/graphdb/Dockerfile b/docker/graphdb/Dockerfile new file mode 100644 index 0000000..21e8fa6 --- /dev/null +++ b/docker/graphdb/Dockerfile @@ -0,0 +1,7 @@ +FROM ontotext/graphdb:10.0.2 + +RUN mkdir -p /root/graphdb-import/data +WORKDIR /root/graphdb-import/data + +COPY ./initdb +RUN cd /initdb && ./init_graphdb.sh \ No newline at end of file diff --git a/docker/graphdb/initdb/init_graphdb.sh b/docker/graphdb/initdb/init_graphdb.sh new file mode 100755 index 0000000..76c1989 --- /dev/null +++ b/docker/graphdb/initdb/init_graphdb.sh @@ -0,0 +1,46 @@ +#!/bin/bash -ex + +GDBPIDF="/tmp/init-graphdb-serv.pid" +GDBOUTF="/tmp/init-graphdb-out.txt" + +start_graphdb(){ + rm -f ${GDBPIDF} + graphdb -s -p ${GDBPIDF} >${GDBOUTF} 2>&1 & + sleep 1 +} + +wait_graphdb(){ + count=0 + while ! nc -z localhost 7200; do + count=$((count+1)) + if [ $count -gt 1000 ]; then + return + fi + # else + sleep 0.1 # wait for 1/10 of the second before check again + done +} + +stop_graphdb(){ + kill -9 $(cat ${GDBPIDF}) + sleep 1 + rm -f ${GDBPIDF} + rm -f ${GDBOUTF} +} + +createdb() { + curl -X POST http://localhost:7200/rest/repositories -H 'Content-Type: multipart/form-data' -F config=@lwua23-repo.ttl +} + + +# one could do it like this +#start_graphdb +#wait_graphdb +#createdb +#wait_configdb +#stop_graphdb + +# but actually this just works too: +REPODIR="/opt/graphdb/home/data/repositories/lwua23" +mkdir -p ${REPODIR} +cp ./lwua23-repo.ttl ${REPODIR}/config.ttl diff --git a/docker/graphdb/initdb/lwua23-repo.ttl b/docker/graphdb/initdb/lwua23-repo.ttl new file mode 100644 index 0000000..98133fd --- /dev/null +++ b/docker/graphdb/initdb/lwua23-repo.ttl @@ -0,0 +1,42 @@ +# +# RDF4J configuration template for a GraphDB repository +# +@prefix rdfs: . +@prefix rep: . +@prefix sr: . +@prefix sail: . +@prefix graphdb: . + +# [] a rep:Repository ; # note <-- the config downloaded is represented as a blank node +<#lwua23> a rep:Repository ; # in the graphdb data folder it can just be a local fragment identifier + rep:repositoryID "lwua23" ; + rdfs:label "lifewatch user analysis 2023" ; + rep:repositoryImpl [ + rep:repositoryType "graphdb:SailRepository" ; + sr:sailImpl [ + sail:sailType "graphdb:Sail" ; + + graphdb:base-URL "http://example.org/owlim#" ; + graphdb:defaultNS "" ; + graphdb:entity-index-size "10000000" ; + graphdb:entity-id-size "32" ; + graphdb:imports "" ; + graphdb:repository-type "file-repository" ; + graphdb:ruleset "empty"; + graphdb:storage-folder "storage" ; + + graphdb:enable-context-index "false" ; + + graphdb:enablePredicateList "true" ; + + graphdb:in-memory-literal-properties "true" ; + graphdb:enable-literal-index "true" ; + + graphdb:check-for-inconsistencies "false" ; + graphdb:disable-sameAs "true" ; + graphdb:query-timeout "0" ; + graphdb:query-limit-results "0" ; + graphdb:throw-QueryEvaluationException-on-timeout "false" ; + graphdb:read-only "false" ; + ] + ]. From 97586d3e39160c7198af7322c88bd89b1abb1077 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Mon, 11 Sep 2023 13:44:42 +0200 Subject: [PATCH 07/60] introduce the notebooks so they become available in the jupyter --- .gitignore | 3 +- .../jupyter/notebooks/01-basic-sparql.ipynb | 35 +++++++++++++++++++ docker/jupyter/notebooks/Untiltled.ipynb | 0 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 docker/jupyter/notebooks/01-basic-sparql.ipynb create mode 100644 docker/jupyter/notebooks/Untiltled.ipynb diff --git a/.gitignore b/.gitignore index d9edd80..bc1ee08 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .env *.log -__pycache__ \ No newline at end of file +__pycache__ +.ipynb_checkpoints/ \ No newline at end of file diff --git a/docker/jupyter/notebooks/01-basic-sparql.ipynb b/docker/jupyter/notebooks/01-basic-sparql.ipynb new file mode 100644 index 0000000..1f19aee --- /dev/null +++ b/docker/jupyter/notebooks/01-basic-sparql.ipynb @@ -0,0 +1,35 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "0586dd76-16e3-4da3-944b-7b4c22496f8a", + "metadata": {}, + "outputs": [], + "source": [ + "from pykg2tbl import KGSource" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docker/jupyter/notebooks/Untiltled.ipynb b/docker/jupyter/notebooks/Untiltled.ipynb new file mode 100644 index 0000000..e69de29 From e0eefa20c666d8f3a42b4ecf4ccc573956ee156c Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Mon, 11 Sep 2023 13:45:36 +0200 Subject: [PATCH 08/60] use the new feaures of the jupyter and graphdb images --- docker/docker-compose.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 36881a4..abf2905 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -11,17 +11,21 @@ services: links: - graphdb volumes: - - "./notebooks:/notebooks" + - "./jupyter/notebooks:/notebooks" labels: be.vliz.container.project: "LWUA" be.vliz.container.group: "services" graphdb: # todo consider a local build that also initializes a repository for lwua inside this lwua_graphdb! - image: ontotext/graphdb:10.0.2 + build: + context: ./graphdb + image: lwua/lwua_graphdb container_name: lwua_graphdb ports: - 7200:7200 # HTTP + volumes: + - ../data:/root/graphdb-import/data # todo - think about volumes for persistence of data labels: be.vliz.container.project: "LWUA" From 30bd04fb128272888731c21af0a213c56b754a30 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Mon, 11 Sep 2023 13:48:15 +0200 Subject: [PATCH 09/60] cleanup not needed test script --- docker/lwua-ingest/entrypoint.sh | 3 --- 1 file changed, 3 deletions(-) delete mode 100755 docker/lwua-ingest/entrypoint.sh diff --git a/docker/lwua-ingest/entrypoint.sh b/docker/lwua-ingest/entrypoint.sh deleted file mode 100755 index 5d19541..0000000 --- a/docker/lwua-ingest/entrypoint.sh +++ /dev/null @@ -1,3 +0,0 @@ -#! /usr/bin/env bash - -python --version \ No newline at end of file From a9b85af814c07e65b7164cc4274226d9f62bf47a Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Mon, 11 Sep 2023 18:17:37 +0200 Subject: [PATCH 10/60] ingest of file succeeded completes the goal of "test driving the ingest of rdf into graphdb" fixes #4 --- docker/lwua-ingest/lwua-py/lwua/ingest.py | 103 ++++++++++++++++------ 1 file changed, 76 insertions(+), 27 deletions(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 22c0c46..226a224 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -1,22 +1,40 @@ from SPARQLWrapper import SPARQLWrapper, JSON +from pathlib import Path import logging +from rdflib import Graph +import os +from dotenv import load_dotenv log = logging.getLogger(__name__) def run_ingest(): - log.info("run_ingest") + data_path = data_path_from_config() + log.info(f"run_ingest on updated files in {data_path}") + # TODO -- immplement steps + # list all the contents (files) in data_path together with last mod + # get the <#admin-luwa-ingest> graph listing the maintained named-graphs and their lastmod + # there nees to be a mapping between filenames and named-graphs ! + # check which filenames are younger then their named-graph equivalent + # read them into mem - replace the coresponding named-graph in the repo + # update the triple for the named-graph to lastmod in the admin grap + + +def data_path_from_config(): + folder_name = os.getenv("GDB_DATA_FOLDER", "/root/graphdb-import/data") + return Path(folder_name).absolute() def gdb_from_config(): - # todo get base and repoid from .env - base = "http://localhost:7200" - repoid = "lwua23" + base = os.getenv("GDB_BASE", "http://localhost:7200") + repoid = os.getenv("GDB_REPO", "lwua23") endpoint = f"{ base }/repositories/{ repoid }" updateEndpoint = endpoint + "/statements" # update statements are handled at other endpoint + log.debug(f"using endpoint {endpoint}") + gdb = SPARQLWrapper( endpoint=endpoint, updateEndpoint=updateEndpoint, @@ -27,38 +45,69 @@ def gdb_from_config(): return gdb -def ingest_testing(): +def ingest_graph(graph, gname: str = None, replace: bool = False): + log.debug(f"to insert data into <{ gname }>") - file = "../../data/project.ttl" - # todo convert the file to n3 - # for now hardcoded sample: - n3str = """ - . - """ + gdb = gdb_from_config() - # define a graph -- todo find better id - graphid = f"https://example.org/lwua23/{ file }" - # assemble the insert statement - inserts = f"INSERT DATA {{ GRAPH <{ graphid }> {{ { n3str } }} }}" - print(inserts) + # do the cleanup if possible + if replace and gname is not None: + pass # TODO execute delete of full graph -- have to check syntax + + # extract the triples and format the insert statement + ntstr = graph.serialize(format="nt") + log.debug(f"extracted tiples == { ntstr }") + if gname is not None: + inserts = f"INSERT DATA {{ GRAPH <{ gname }> {{ { ntstr } }} }}" + else: + inserts = f"INSERT DATA {{ { ntstr} }}" + log.debug(f"INSERT of ==> { inserts }") - gdb = gdb_from_config() gdb.setQuery(inserts) + log.debug(f"detected querytype == {gdb.queryType}") + + # unsure if this is needed -- can sqlwrapper detect this automatically? gdb.queryType = 'INSERT' # important to indicate that the update Endpoint should be used - results = gdb.query().convert() - print(results) - return results + gdb.query() -def main(): - logging.basicConfig() - logging.getLogger().setLevel(logging.DEBUG) - reqlog = logging.getLogger('requests.packages.urllib3') - reqlog.setLevel(logging.DEBUG) - reqlog.propagate = True +def fname_2_gname(fname): + return f"urn:lwua:data/{fname}" # TODO maybe consider something else? + - ingest_testing() +def suffix_2_format(suffix): + if suffix in ["ttl", "turtle"]: + return "turtle" + if suffix in ["jsonld", "json"]: + return "json-ld" + # todo consider others if needed + return None + + +def read_graph(fpath: Path, format: str = None): + format = format or suffix_2_format(fpath.suffix) + graph = Graph().parse(location=str(fpath), format=format) + return graph + + +def ingest_data_file(fname): + file_path = data_path_from_config() / fname + assert file_path.exists(), f"cannot ingest file at {file_path}" + + gname = fname_2_gname(fname) + graph = read_graph(file_path) + + ingest_graph(graph, gname=gname) + + +# Note: this main method allows to locally test outside docker +# directly connecting to a localhost graphdb endpoint (which might be inside docker!) +def main(): + load_dotenv() + ingest_data_file("project.ttl") + # todo + # run_ingest() if __name__ == '__main__': From 417766c3817084859fadaa05319a2b4a1643db90 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:12:17 +0200 Subject: [PATCH 11/60] prefer https for schema.org --- data/project.ttl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/project.ttl b/data/project.ttl index 291f7fe..bf11489 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -1,4 +1,4 @@ -@prefix schema: . +@prefix schema: . @prefix xsd: . @prefix cc: . From c71976bb2e7a5ba4f2cd0521806dfa5513283334 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:12:58 +0200 Subject: [PATCH 12/60] rename docker/info script, introducing jq and some enhancements --- docker/dlwua-info.sh | 6 ------ docker/info.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) delete mode 100755 docker/dlwua-info.sh create mode 100755 docker/info.sh diff --git a/docker/dlwua-info.sh b/docker/dlwua-info.sh deleted file mode 100755 index ae8b37b..0000000 --- a/docker/dlwua-info.sh +++ /dev/null @@ -1,6 +0,0 @@ -#! /usr/bin/env bash - -for dn in $(docker ps --format {{.Names}} | grep lwua_); do - echo -e ">> $dn >>\n" - docker inspect $dn | grep com.docker.compose.project -done \ No newline at end of file diff --git a/docker/info.sh b/docker/info.sh new file mode 100755 index 0000000..fb27094 --- /dev/null +++ b/docker/info.sh @@ -0,0 +1,27 @@ +#! /usr/bin/env bash + +echo "info on docker processes in this project" + +for dn in $(docker ps --format {{.Names}} | grep lwua_); do + echo -e ">> $dn >>" + echo " open shell with > docker exec -it ${dn} /bin/bash" + + # get the docker inspect json and parse it using jq + djson=$(docker inspect $dn) + + id=$(echo $djson | jq '.[].Id') + echo " id == ${id}" + + dir=$(echo $djson | jq '.[].Config.Labels. "com.docker.compose.project.working_dir"') + echo " dir == ${dir}" + + echo + +done + + +# todo +#- list logging output folders +#- list http entrypoints for browser ? open ports? +#- check if jq dependency is installed and advise to do so +#- check if config path matches location of this script! --> would indicate services are running from different location! \ No newline at end of file From d9f8ce12b6b6bfdea446090147bb5c0fa8fcf140 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:13:26 +0200 Subject: [PATCH 13/60] fix error in copy statement (2nd arg required) --- docker/graphdb/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/graphdb/Dockerfile b/docker/graphdb/Dockerfile index 21e8fa6..d860f87 100644 --- a/docker/graphdb/Dockerfile +++ b/docker/graphdb/Dockerfile @@ -3,5 +3,5 @@ FROM ontotext/graphdb:10.0.2 RUN mkdir -p /root/graphdb-import/data WORKDIR /root/graphdb-import/data -COPY ./initdb +COPY ./initdb /initdb RUN cd /initdb && ./init_graphdb.sh \ No newline at end of file From edeb104037e02b1b5f4f22e643635b86023a7f64 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:14:35 +0200 Subject: [PATCH 14/60] introduce external shared logging volume --- docker/docker-compose.yml | 1 + docker/lwua-ingest/Dockerfile | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index abf2905..e56ff38 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -39,6 +39,7 @@ services: container_name: lwua_ingest volumes: - ../data:/data # Store for any input data + - ../logging:/logging # Store for any input data env_file: - ../.env # for test / dev -- no restart and single run diff --git a/docker/lwua-ingest/Dockerfile b/docker/lwua-ingest/Dockerfile index c81174c..e77f3de 100644 --- a/docker/lwua-ingest/Dockerfile +++ b/docker/lwua-ingest/Dockerfile @@ -19,7 +19,7 @@ ENV PATH="$POETRY_HOME/bin:$PATH" RUN python -c 'from urllib.request import urlopen; print(urlopen("https://install.python-poetry.org").read().decode())' | python - # get the source code in -COPY ./lwua-py ./ +COPY ./lwua-py /lwua-py # use poetry to build and install -- creating the local .venv RUN poetry install --no-interaction --no-ansi -vvv @@ -31,4 +31,8 @@ FROM python as runtime ENV PATH="/lwua-py/.venv/bin:$PATH" # ensure we have the build folder from the poetry stage of this image COPY --from=poetry /lwua-py /lwua-py + +VOLUME /data +VOLUME /logging +RUN ln -s /logging /lwua-py/logging ENTRYPOINT ["python", "-m", "lwua"] From b897e682f71920aef41e69d2dcd22230302ca09c Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:15:06 +0200 Subject: [PATCH 15/60] updated deps --- docker/lwua-ingest/lwua-py/poetry.lock | 42 +++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/docker/lwua-ingest/lwua-py/poetry.lock b/docker/lwua-ingest/lwua-py/poetry.lock index 6e2988c..82c8400 100644 --- a/docker/lwua-ingest/lwua-py/poetry.lock +++ b/docker/lwua-ingest/lwua-py/poetry.lock @@ -249,7 +249,47 @@ tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] +[[package]] +name = "watchdog" +version = "3.0.0" +description = "Filesystem events monitoring" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41"}, + {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397"}, + {file = "watchdog-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2b57a1e730af3156d13b7fdddfc23dea6487fceca29fc75c5a868beed29177ae"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7ade88d0d778b1b222adebcc0927428f883db07017618a5e684fd03b83342bd9"}, + {file = "watchdog-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7e447d172af52ad204d19982739aa2346245cc5ba6f579d16dac4bfec226d2e7"}, + {file = "watchdog-3.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9fac43a7466eb73e64a9940ac9ed6369baa39b3bf221ae23493a9ec4d0022674"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8ae9cda41fa114e28faf86cb137d751a17ffd0316d1c34ccf2235e8a84365c7f"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:25f70b4aa53bd743729c7475d7ec41093a580528b100e9a8c5b5efe8899592fc"}, + {file = "watchdog-3.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4f94069eb16657d2c6faada4624c39464f65c05606af50bb7902e036e3219be3"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7c5f84b5194c24dd573fa6472685b2a27cc5a17fe5f7b6fd40345378ca6812e3"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa7f6a12e831ddfe78cdd4f8996af9cf334fd6346531b16cec61c3b3c0d8da0"}, + {file = "watchdog-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:233b5817932685d39a7896b1090353fc8efc1ef99c9c054e46c8002561252fb8"}, + {file = "watchdog-3.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:13bbbb462ee42ec3c5723e1205be8ced776f05b100e4737518c67c8325cf6100"}, + {file = "watchdog-3.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:8f3ceecd20d71067c7fd4c9e832d4e22584318983cabc013dbf3f70ea95de346"}, + {file = "watchdog-3.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c9d8c8ec7efb887333cf71e328e39cffbf771d8f8f95d308ea4125bf5f90ba64"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0e06ab8858a76e1219e68c7573dfeba9dd1c0219476c5a44d5333b01d7e1743a"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:d00e6be486affb5781468457b21a6cbe848c33ef43f9ea4a73b4882e5f188a44"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:c07253088265c363d1ddf4b3cdb808d59a0468ecd017770ed716991620b8f77a"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:5113334cf8cf0ac8cd45e1f8309a603291b614191c9add34d33075727a967709"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:51f90f73b4697bac9c9a78394c3acbbd331ccd3655c11be1a15ae6fe289a8c83"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:ba07e92756c97e3aca0912b5cbc4e5ad802f4557212788e72a72a47ff376950d"}, + {file = "watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d429c2430c93b7903914e4db9a966c7f2b068dd2ebdd2fa9b9ce094c7d459f33"}, + {file = "watchdog-3.0.0-py3-none-win32.whl", hash = "sha256:3ed7c71a9dccfe838c2f0b6314ed0d9b22e77d268c67e015450a29036a81f60f"}, + {file = "watchdog-3.0.0-py3-none-win_amd64.whl", hash = "sha256:4c9956d27be0bb08fc5f30d9d0179a855436e655f046d288e2bcc11adfae893c"}, + {file = "watchdog-3.0.0-py3-none-win_ia64.whl", hash = "sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759"}, + {file = "watchdog-3.0.0.tar.gz", hash = "sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "81fd3c5daa1d551b619488776b7d186ccd4e0d9a0638ecf2dc65efd7c129fc45" +content-hash = "c8bffe50f97eac9eb940805add5c23e9711e708ef880fdf63d9fc3dfb58e4ad4" From 2f90b6b40f3906a7c30545c2e10337f09e512de4 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:15:31 +0200 Subject: [PATCH 16/60] updated deps --- docker/lwua-ingest/lwua-py/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/lwua-ingest/lwua-py/pyproject.toml b/docker/lwua-ingest/lwua-py/pyproject.toml index 98e482d..4fa77c9 100644 --- a/docker/lwua-ingest/lwua-py/pyproject.toml +++ b/docker/lwua-ingest/lwua-py/pyproject.toml @@ -11,6 +11,7 @@ apscheduler = "^3.10.4" pyaml = "^23.9.3" python-dotenv = "^1.0.0" sparqlwrapper = "^2.0.0" +watchdog = "^3.0.0" [build-system] From 0983c1df61ab425f817fbb370ab0b410a8909cfe Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:15:58 +0200 Subject: [PATCH 17/60] ensure the log folder exists --- logging/.gitplaceholder | 1 + 1 file changed, 1 insertion(+) create mode 100644 logging/.gitplaceholder diff --git a/logging/.gitplaceholder b/logging/.gitplaceholder new file mode 100644 index 0000000..e025063 --- /dev/null +++ b/logging/.gitplaceholder @@ -0,0 +1 @@ +# ensuring this folder exists \ No newline at end of file From 846ec54561c91234aaa56939782479408b4ddd80 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:16:58 +0200 Subject: [PATCH 18/60] fix path to data - as it is distinct to the location inside the grpahdb container --- docker/lwua-ingest/lwua-py/lwua/ingest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 226a224..c8f5dca 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -4,6 +4,7 @@ from rdflib import Graph import os from dotenv import load_dotenv +from .helpers import enable_logging log = logging.getLogger(__name__) @@ -22,7 +23,7 @@ def run_ingest(): def data_path_from_config(): - folder_name = os.getenv("GDB_DATA_FOLDER", "/root/graphdb-import/data") + folder_name = os.getenv("INGEST_DATA_FOLDER", "/data") return Path(folder_name).absolute() @@ -105,6 +106,7 @@ def ingest_data_file(fname): # directly connecting to a localhost graphdb endpoint (which might be inside docker!) def main(): load_dotenv() + enable_logging() ingest_data_file("project.ttl") # todo # run_ingest() From c31846c0b75bf4e479151d5ab87941a929c69e5f Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:17:23 +0200 Subject: [PATCH 19/60] use the new external logging/ folder --- docker/lwua-ingest/lwua-py/debug-logconf.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/lwua-ingest/lwua-py/debug-logconf.yml b/docker/lwua-ingest/lwua-py/debug-logconf.yml index 9884ee3..df2a0a0 100644 --- a/docker/lwua-ingest/lwua-py/debug-logconf.yml +++ b/docker/lwua-ingest/lwua-py/debug-logconf.yml @@ -15,7 +15,7 @@ handlers: class: logging.FileHandler level: DEBUG formatter: base - filename: lwua-ingest-debug.log + filename: logging/lwua-ingest-debug.log mode: 'a' # in stead of 'w' loggers: __main__: From 892abd6fc3990489ff05d0466219db1e85808705 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 12:17:43 +0200 Subject: [PATCH 20/60] extended readme --- README.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 7d4c393..2024ae1 100644 --- a/README.md +++ b/README.md @@ -3,30 +3,57 @@ ## using this project Steps: + 1. retrieve the source code from github -2. to start up the services simply run + +2. to build the services simply run ```bash .$ touch .env # make sure you have an .env file -.$ cd docker && docker-compose up # use docker to run the services +.$ cd docker && docker-compose build # use docker to build the services ``` -3. open the jupyter notebook + +3. to start up the services simply run + +```bash +.$ cd docker && docker-compose up # use docker to run the services +``` + +4. open the jupyter notebook ```bash .$ xdg-open $(docker/jupyter_url.sh) # this gets the url for the service and opens a browser to it ``` +5. open the graphdb browser ui + +```bash +.$ xdg-open http://localhost:7200 # opens the web ui in a browser +``` + +6. run a test-ingest + +This introduces forcefully at least the data/project.ttl into the triple store +This should not be needed when the ingest runs automatically + +```bash +.$ docker exec -it lwua_ingest /bin/bash # interactively gets you into the ingest env +root@f226b253fbd4:/lwua-py# python -m lwua.ingest # run the ingest +``` -## general plan + +## general plan ahead -- details to be converted into github issues big idea is to have a central triples store for the user analysis approach this to decouple the ingest (retrieval and semantic mapping) from the different sources from the reporting (which should be based on the assembled knowledge graph) -### for the ingest we will need a mix if strategies -* actually getting data by using dumps our webservices -* additionally uplifting thos to triples (via pysubyt) +### for the ingest we will need a mix of strategies +* actually getting raw (non linkd) data by using dumps from webservices +* additionally uplifting those to triples (via pysubyt) * possibly ingesting long-living reference sets through ldes client - +* augmenting strategies --> starting by reading from what we already have in store, decide, then fetch more connected data, and produce more triples +* possibly add semantic reasoner +* attention to provenance triples for meta analysis ? #### Ingest Tasks - identify sources (dumps, werbservices or sparql endpoints) @@ -48,7 +75,6 @@ this to decouple the ingest (retrieval and semantic mapping) from the different - build ipynb reports - ### model-design * identify the shape of the graph we will use and how all items will be linked together * source for uplifting and querying @@ -75,9 +101,23 @@ this to decouple the ingest (retrieval and semantic mapping) from the different - deploy at docker-dev - setup ci/cd for autodeploy +### meta & wrap up +#### release management +- to be setup +- to consider split between reusable platform of components for generic semantic analysis & lwua23 +- to organise multiple repos +- to publish images on docker-hub? elsewhere? -## repo layout +#### documentation +- todo / make lists +- probably organize into separate /docs/**md linked from this readme ? + + + +## documentation + +### repo layout src / py / lwua_ingest --> module for ingest, has nested ./lwua_ingest/ and ./tests/ @@ -94,3 +134,5 @@ docker / tools --> useful bash scripts to do some standard docker commands (as a docs / **.md --> with useful planning / motivation / usage / etc etc docs (e.g. list-of-sources.md) data / {source} / **.* out of band retrieved actual files + +logging / ** placeholder folder where dedicated logging from different docker-containers are grouped and put together. From 907d87d02b2e3419de02c66388326ea96407461e Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Thu, 26 Oct 2023 16:59:31 +0200 Subject: [PATCH 21/60] room for more dependencies in ipynb context --- docker/jupyter/Dockerfile | 8 ++++---- docker/jupyter/requirements.txt | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 docker/jupyter/requirements.txt diff --git a/docker/jupyter/Dockerfile b/docker/jupyter/Dockerfile index 7c73561..b5b99c0 100644 --- a/docker/jupyter/Dockerfile +++ b/docker/jupyter/Dockerfile @@ -1,6 +1,6 @@ FROM jupyter/base-notebook - -RUN pip install pykg2tbl - VOLUME /notebooks -WORKDIR /notebooks \ No newline at end of file +WORKDIR /notebooks + +COPY ./requirements.txt /requirements.txt +RUN pip install -r /requirements.txt diff --git a/docker/jupyter/requirements.txt b/docker/jupyter/requirements.txt new file mode 100644 index 0000000..a106286 --- /dev/null +++ b/docker/jupyter/requirements.txt @@ -0,0 +1 @@ +pykg2tbl From 523df6e02dd9dad9f7156b14708c17914fed3784 Mon Sep 17 00:00:00 2001 From: Marc Portier Date: Tue, 14 Nov 2023 12:11:50 +0100 Subject: [PATCH 22/60] as is current dump of progress towards autodetection --- README.md | 2 +- docker/docker-compose.yml | 36 +-- .../jupyter/notebooks/01-basic-sparql.ipynb | 226 +++++++++++++++++- .../queries/instances-of-type.sparql | 27 +++ .../jupyter/notebooks/queries/person.sparql | 19 ++ .../jupyter/notebooks/queries/project.sparql | 19 ++ docker/lwua-ingest/Dockerfile | 8 +- docker/lwua-ingest/lwua-py/logging | 1 + docker/lwua-ingest/lwua-py/lwua/ingest.py | 41 ++-- .../lwua-ingest/lwua-py/lwua/tryout-watch.py | 55 +++++ dotenv-example | 5 + 11 files changed, 398 insertions(+), 41 deletions(-) create mode 100644 docker/jupyter/notebooks/queries/instances-of-type.sparql create mode 100644 docker/jupyter/notebooks/queries/person.sparql create mode 100644 docker/jupyter/notebooks/queries/project.sparql create mode 120000 docker/lwua-ingest/lwua-py/logging create mode 100644 docker/lwua-ingest/lwua-py/lwua/tryout-watch.py create mode 100644 dotenv-example diff --git a/README.md b/README.md index 2024ae1..9791103 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Steps: 2. to build the services simply run ```bash -.$ touch .env # make sure you have an .env file +.$ cp dotenv-example .env # make sure you have an .env file .$ cd docker && docker-compose build # use docker to build the services ``` diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index e56ff38..646d386 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,6 +1,20 @@ version: "3.7" # ----------------------------------------------------------------------- services: + graphdb: + build: + context: ./graphdb + image: lwua/lwua_graphdb + container_name: lwua_graphdb + ports: + - 7200:7200 # HTTP + volumes: + - ../data:/root/graphdb-import/data + # todo - think about volumes for persistence of data ? + labels: + be.vliz.container.project: "LWUA" + be.vliz.container.group: "services" + jupyter: build: context: ./jupyter @@ -12,21 +26,10 @@ services: - graphdb volumes: - "./jupyter/notebooks:/notebooks" - labels: - be.vliz.container.project: "LWUA" - be.vliz.container.group: "services" - - graphdb: - # todo consider a local build that also initializes a repository for lwua inside this lwua_graphdb! - build: - context: ./graphdb - image: lwua/lwua_graphdb - container_name: lwua_graphdb - ports: - - 7200:7200 # HTTP - volumes: - - ../data:/root/graphdb-import/data - # todo - think about volumes for persistence of data + env_file: + - ../.env + environment: + - GDB_BASE=http://graphdb:7200/ labels: be.vliz.container.project: "LWUA" be.vliz.container.group: "services" @@ -42,6 +45,9 @@ services: - ../logging:/logging # Store for any input data env_file: - ../.env + environment: + - GDB_BASE=http://graphdb:7200/ + - INGEST_DATA_FOLDER=/data # for test / dev -- no restart and single run restart: "no" command: run diff --git a/docker/jupyter/notebooks/01-basic-sparql.ipynb b/docker/jupyter/notebooks/01-basic-sparql.ipynb index 1f19aee..b4db079 100644 --- a/docker/jupyter/notebooks/01-basic-sparql.ipynb +++ b/docker/jupyter/notebooks/01-basic-sparql.ipynb @@ -2,12 +2,228 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, - "id": "0586dd76-16e3-4da3-944b-7b4c22496f8a", - "metadata": {}, + "execution_count": 2, + "id": "1a16d4cc-ab43-4394-ad1d-80ac0e1745b0", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "outputs": [], "source": [ - "from pykg2tbl import KGSource" + "from pykg2tbl import DefaultSparqlBuilder, KGSource, QueryResult\n", + "from pathlib import Path\n", + "from pandas import DataFrame\n", + "import os\n", + "\n", + "\n", + "# SPARQL EndPoint to use - wrapped as Knowledge-Graph 'source'\n", + "GDB_BASE:str = os.getenv(\"GDB_BASE\", \"http://localhost:7200/\")\n", + "GDB_REPO: str = os.getenv(\"GDB_REPO\", \"lwua23\")\n", + "GDB_ENDPOINT: str = f\"{GDB_BASE}repositories/{GDB_REPO}\"\n", + "GDB: KGSource = KGSource.build(GDB_ENDPOINT)\n", + "\n", + "TEMPLATES_FOLDER = str(Path().absolute() / \"queries\")\n", + "GENERATOR = DefaultSparqlBuilder(templates_folder=TEMPLATES_FOLDER)\n", + "\n", + "\n", + "def generate_sparql(name: str, **vars) -> str: \n", + " \"\"\" Simply build the sparql by using the named query and applying the vars\n", + " \"\"\"\n", + " return GENERATOR.build_syntax(name, **vars)\n", + "\n", + "\n", + "def execute_to_df(name: str, **vars) -> DataFrame:\n", + " \"\"\" Builds the sparql and executes, returning the result as a dataframe.\n", + " \"\"\"\n", + " sparql = generate_sparql(name, **vars)\n", + " result: QueryResult = GDB.query(sparql=sparql)\n", + " return result.to_dataframe()" + ] + }, + { + "cell_type": "markdown", + "id": "5164a2eb-4ed9-40a8-a111-2dea39d5f925", + "metadata": {}, + "source": [ + "# basic entity listing" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a80add69-2f69-4f4c-b9ff-4e40f77cb0cf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectname
0http://example.com/lwua23Lifewatch User Analysis 2023
\n", + "
" + ], + "text/plain": [ + " project name\n", + "0 http://example.com/lwua23 Lifewatch User Analysis 2023" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "execute_to_df(\"project.sparql\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8162b28d-1a28-4346-884e-7341d83841ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
personname
0https://orcid.org/0000-0002-9648-6484Marc Portier
1https://orcid.org/0000-0003-0663-5907Laurian van Maldghem
\n", + "
" + ], + "text/plain": [ + " person name\n", + "0 https://orcid.org/0000-0002-9648-6484 Marc Portier\n", + "1 https://orcid.org/0000-0003-0663-5907 Laurian van Maldghem" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "execute_to_df(\"person.sparql\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5b5e7448-d723-46fc-818b-2ec41306f471", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
instancelabel
0https://orcid.org/0000-0002-9648-6484Marc Portier
1https://orcid.org/0000-0003-0663-5907Laurian van Maldghem
\n", + "
" + ], + "text/plain": [ + " instance label\n", + "0 https://orcid.org/0000-0002-9648-6484 Marc Portier\n", + "1 https://orcid.org/0000-0003-0663-5907 Laurian van Maldghem" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "findPerson=dict(type=\"https://schema.org/Person\")\n", + "findProject=dict(type=\"https://schema.org/Project\")\n", + "execute_to_df(\"instances-of-type.sparql\", **findPerson) " ] } ], @@ -27,7 +243,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docker/jupyter/notebooks/queries/instances-of-type.sparql b/docker/jupyter/notebooks/queries/instances-of-type.sparql new file mode 100644 index 0000000..41a599b --- /dev/null +++ b/docker/jupyter/notebooks/queries/instances-of-type.sparql @@ -0,0 +1,27 @@ +{#- Jinja Template for Sparql Query + | Lists instances of certain type with their (optional) schema:name, skos:prefLabel, dc:title or rdfs:label + | variables in template: + | - type: URI of the type to list + | - (optional) N: number of results you want (defaults to no limits --> full listing) + | - (optional) O: offset to apply when paging + -#} +PREFIX dc: +PREFIX rdf: +PREFIX rdfs: +PREFIX schema: +PREFIX skos: + +SELECT + (?s as ?instance) + ?label +WHERE { + BIND (<{{ type }}> as ?type) + ?s a ?type . + OPTIONAL { ?s schema:name ?sorgname .} + OPTIONAL { ?s dc:title ?dctitle .} + OPTIONAL { ?s rdfs:label ?rdfslbl .} + OPTIONAL { ?s skos:prefLabel ?skoslbl .} + BIND(coalesce( ?sorgname, ?dctitle, ?rdfslbl, ?skoslbl, "«no name found»") as ?label) # coalesce picks first one that is available +} +{{ "OFFSET " ~ O if O else "" }} {#- add offset statement if provided #} +{{ "LIMIT " ~ N if N else "" }} {#- add limit statement if provided #} \ No newline at end of file diff --git a/docker/jupyter/notebooks/queries/person.sparql b/docker/jupyter/notebooks/queries/person.sparql new file mode 100644 index 0000000..4b0414e --- /dev/null +++ b/docker/jupyter/notebooks/queries/person.sparql @@ -0,0 +1,19 @@ +{#- Jinja Template for Sparql Query + | Lists instances of certain type with their (optional) schema:name, skos:prefLabel, dc:title or rdf:?? + | variables in template: + | - (optional) N: number of results you want (defaults to no limits --> full listing) + | - (optional) O: offset to apply when paging + -#} +PREFIX schema: + +SELECT + (?s as ?person) + ?name +WHERE { + ?s a schema:Person . + OPTIONAL { + ?s schema:name ?name + } +} +{{ "OFFSET " ~ O if O else "" }} {#- add offset statement if provided #} +{{ "LIMIT " ~ N if N else "" }} {#- add limit statement if provided #} \ No newline at end of file diff --git a/docker/jupyter/notebooks/queries/project.sparql b/docker/jupyter/notebooks/queries/project.sparql new file mode 100644 index 0000000..aac4912 --- /dev/null +++ b/docker/jupyter/notebooks/queries/project.sparql @@ -0,0 +1,19 @@ +{#- Jinja Template for Sparql Query + | Lists schema:Project instances with their (optional) schema:name + | variables in template: + | - (optional) N: number of results you want (defaults to no limits --> full listing) + | - (optional) O: offset to apply when paging + -#} +PREFIX schema: + +SELECT + (?s as ?project) + ?name +WHERE { + ?s a schema:Project . + OPTIONAL { + ?s schema:name ?name . + } +} +{{ "OFFSET " ~ O if O else "" }} {#- add offset statement if provided #} +{{ "LIMIT " ~ N if N else "" }} {#- add limit statement if provided #} \ No newline at end of file diff --git a/docker/lwua-ingest/Dockerfile b/docker/lwua-ingest/Dockerfile index e77f3de..1a59c3d 100644 --- a/docker/lwua-ingest/Dockerfile +++ b/docker/lwua-ingest/Dockerfile @@ -1,6 +1,8 @@ FROM python:3.10-slim as python -ENV PYTHONUNBUFFERED=true +VOLUME /data +VOLUME /logging WORKDIR /lwua-py +ENV PYTHONUNBUFFERED=true # check for inspiration on including poetry # - https://hub.docker.com/r/airdock/python-poetry @@ -32,7 +34,5 @@ ENV PATH="/lwua-py/.venv/bin:$PATH" # ensure we have the build folder from the poetry stage of this image COPY --from=poetry /lwua-py /lwua-py -VOLUME /data -VOLUME /logging -RUN ln -s /logging /lwua-py/logging +RUN rm /lwua-py/logging && ln -s /logging /lwua-py/logging ENTRYPOINT ["python", "-m", "lwua"] diff --git a/docker/lwua-ingest/lwua-py/logging b/docker/lwua-ingest/lwua-py/logging new file mode 120000 index 0000000..42ff418 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/logging @@ -0,0 +1 @@ +../../../logging/ \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index c8f5dca..9232d1a 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -4,10 +4,11 @@ from rdflib import Graph import os from dotenv import load_dotenv -from .helpers import enable_logging +from .helpers import enable_logging, resolve_path log = logging.getLogger(__name__) +URN_BASE = "urn:lwua:INGEST" def run_ingest(): @@ -23,7 +24,8 @@ def run_ingest(): def data_path_from_config(): - folder_name = os.getenv("INGEST_DATA_FOLDER", "/data") + local_default = str(resolve_path("./data", versus="dotenv")) + folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) return Path(folder_name).absolute() @@ -46,20 +48,20 @@ def gdb_from_config(): return gdb -def ingest_graph(graph, gname: str = None, replace: bool = False): - log.debug(f"to insert data into <{ gname }>") +def ingest_graph(graph: Graph, context: str = None, replace: bool = False): + log.debug(f"to insert data into <{ context }>") gdb = gdb_from_config() # do the cleanup if possible - if replace and gname is not None: + if replace and context is not None: pass # TODO execute delete of full graph -- have to check syntax # extract the triples and format the insert statement ntstr = graph.serialize(format="nt") log.debug(f"extracted tiples == { ntstr }") - if gname is not None: - inserts = f"INSERT DATA {{ GRAPH <{ gname }> {{ { ntstr } }} }}" + if context is not None: + inserts = f"INSERT DATA {{ GRAPH <{ context }> {{ { ntstr } }} }}" else: inserts = f"INSERT DATA {{ { ntstr} }}" log.debug(f"INSERT of ==> { inserts }") @@ -73,8 +75,16 @@ def ingest_graph(graph, gname: str = None, replace: bool = False): gdb.query() -def fname_2_gname(fname): - return f"urn:lwua:data/{fname}" # TODO maybe consider something else? +def named_context(name: str, base: str = URN_BASE): + return f"{base}:{name}" # TODO maybe consider something else? + + +def fname_2_context(fname: str): + return named_context(f"data/{fname}") + + +def admin_context(): + return named_context("ADMIN") def suffix_2_format(suffix): @@ -88,7 +98,7 @@ def suffix_2_format(suffix): def read_graph(fpath: Path, format: str = None): format = format or suffix_2_format(fpath.suffix) - graph = Graph().parse(location=str(fpath), format=format) + graph: Graph = Graph().parse(location=str(fpath), format=format) return graph @@ -96,20 +106,19 @@ def ingest_data_file(fname): file_path = data_path_from_config() / fname assert file_path.exists(), f"cannot ingest file at {file_path}" - gname = fname_2_gname(fname) - graph = read_graph(file_path) + context = fname_2_context(fname) + graph = read_graph(file_path) # TODO capture lastmodified of this file too - ingest_graph(graph, gname=gname) + ingest_graph(graph, context=context) + # TODO maintain metadata triples last-ingest / last-modified of ingested file in some admin graph context # Note: this main method allows to locally test outside docker -# directly connecting to a localhost graphdb endpoint (which might be inside docker!) +# directly connecting to a localhost graphdb endpoint (which might be inside docker itself) def main(): load_dotenv() enable_logging() ingest_data_file("project.ttl") - # todo - # run_ingest() if __name__ == '__main__': diff --git a/docker/lwua-ingest/lwua-py/lwua/tryout-watch.py b/docker/lwua-ingest/lwua-py/lwua/tryout-watch.py new file mode 100644 index 0000000..d127644 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/tryout-watch.py @@ -0,0 +1,55 @@ +import time +import os +from dotenv import load_dotenv +from watchdog.observers import Observer +from watchdog.events import FileSystemEventHandler +import logging +from lwua.helpers import enable_logging, resolve_path + +log = logging.getLogger(__name__) + + +class Watcher: + def __init__(self, folder_to_watch): + self.observer = Observer() + self._folder_to_watch = folder_to_watch + + def run(self): + event_handler = Handler() + self.observer.schedule(event_handler, self._folder_to_watch, recursive=True) + log.info(f"observer started @{ self._folder_to_watch }") + self.observer.start() + try: + while True: + time.sleep(5) + except Exception as e: + self.observer.stop() + log.exception("observer stopped", e) + + log.info("ended observer-loop") + self.observer.join() + + +class Handler(FileSystemEventHandler): + + @staticmethod + def on_any_event(event): + if event.is_directory: + return None + + elif event.event_type == 'created': + # Take any action here when a file is first created. + log.info(f"Received created event - { event.src_path }.") + + elif event.event_type == 'modified': + # Taken any action here when a file is modified. + log.info(f"Received modified event - { event.src_path }.") + + +if __name__ == '__main__': + load_dotenv() + enable_logging() + file_to_watch = resolve_path(os.getenv("GDB_DATA_FOLDER", "/root/graphdb-import/data"), "dotenv").absolute() + log.info(f"env pointing to { file_to_watch }") + w = Watcher(file_to_watch) + w.run() diff --git a/dotenv-example b/dotenv-example new file mode 100644 index 0000000..cd0c225 --- /dev/null +++ b/dotenv-example @@ -0,0 +1,5 @@ +# general logconf setting +LOGCONF=debug-logconf.yml + +# general repo setting +GDB_REPO="lwua23" From 7eb3d960f7c01f0829ab38034f36c18dd01acd71 Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Tue, 14 Nov 2023 15:17:47 +0100 Subject: [PATCH 23/60] normalise dos2unix for /docker/**/*.sh files --- .gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..37725a4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +docker/**/*.sh text eol=lf \ No newline at end of file From 8b93eb1f1bda7309b0050f1c0b1ab592bdf9115b Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Tue, 14 Nov 2023 16:21:09 +0100 Subject: [PATCH 24/60] added watcher to injest --- docker/lwua-ingest/lwua-py/lwua/ingest.py | 22 ++++++++++- .../lwua/{tryout-watch.py => tryout_watch.py} | 38 +++++++++---------- 2 files changed, 37 insertions(+), 23 deletions(-) rename docker/lwua-ingest/lwua-py/lwua/{tryout-watch.py => tryout_watch.py} (50%) diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 9232d1a..ab6f9eb 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -5,6 +5,7 @@ import os from dotenv import load_dotenv from .helpers import enable_logging, resolve_path +from .tryout_watch import Watcher, Handler log = logging.getLogger(__name__) @@ -14,6 +15,11 @@ def run_ingest(): data_path = data_path_from_config() log.info(f"run_ingest on updated files in {data_path}") + #init watcher on data_path + w = Watcher(data_path) + w.run() + + # TODO -- immplement steps # list all the contents (files) in data_path together with last mod # get the <#admin-luwa-ingest> graph listing the maintained named-graphs and their lastmod @@ -55,8 +61,8 @@ def ingest_graph(graph: Graph, context: str = None, replace: bool = False): # do the cleanup if possible if replace and context is not None: - pass # TODO execute delete of full graph -- have to check syntax - + delete_graph(gdb, context) + # extract the triples and format the insert statement ntstr = graph.serialize(format="nt") log.debug(f"extracted tiples == { ntstr }") @@ -86,6 +92,18 @@ def fname_2_context(fname: str): def admin_context(): return named_context("ADMIN") +def delete_all_graphs(gdb): + deletes = f"DELETE WHERE {{ GRAPH ?g {{ ?s ?p ?o }} }}" + gdb.setQuery(deletes) + gdb.queryType = 'DELETE' + gdb.query() + +def delete_graph(gbd, context: str): + deletes = f"DELETE WHERE {{ GRAPH <{ context }> {{ ?s ?p ?o }} }}" + gdb.setQuery(deletes) + gdb.queryType = 'DELETE' + gdb.query() + def suffix_2_format(suffix): if suffix in ["ttl", "turtle"]: diff --git a/docker/lwua-ingest/lwua-py/lwua/tryout-watch.py b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py similarity index 50% rename from docker/lwua-ingest/lwua-py/lwua/tryout-watch.py rename to docker/lwua-ingest/lwua-py/lwua/tryout_watch.py index d127644..39c2a92 100644 --- a/docker/lwua-ingest/lwua-py/lwua/tryout-watch.py +++ b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py @@ -8,44 +8,39 @@ log = logging.getLogger(__name__) - class Watcher: - def __init__(self, folder_to_watch): + def __init__(self, directory_to_watch): self.observer = Observer() - self._folder_to_watch = folder_to_watch + self.directory_to_watch = directory_to_watch def run(self): event_handler = Handler() - self.observer.schedule(event_handler, self._folder_to_watch, recursive=True) - log.info(f"observer started @{ self._folder_to_watch }") + self.observer.schedule(event_handler, self.directory_to_watch, recursive=True) self.observer.start() try: while True: + log.debug(f"observer loop {self.directory_to_watch}") time.sleep(5) - except Exception as e: + except KeyboardInterrupt: + log.info("Stopping watcher") self.observer.stop() - log.exception("observer stopped", e) - - log.info("ended observer-loop") + log.info("Stopping observer") self.observer.join() - class Handler(FileSystemEventHandler): + def on_modified(self, event): + log.info(f"File {event.src_path} has been modified") - @staticmethod - def on_any_event(event): - if event.is_directory: - return None - - elif event.event_type == 'created': - # Take any action here when a file is first created. - log.info(f"Received created event - { event.src_path }.") + def on_created(self, event): + log.info(f"File {event.src_path} has been created") - elif event.event_type == 'modified': - # Taken any action here when a file is modified. - log.info(f"Received modified event - { event.src_path }.") + def on_deleted(self, event): + log.info(f"File {event.src_path} has been deleted") + def on_moved(self, event): + log.info(f"File {event.src_path} has been moved to {event.dest_path}") +''' if __name__ == '__main__': load_dotenv() enable_logging() @@ -53,3 +48,4 @@ def on_any_event(event): log.info(f"env pointing to { file_to_watch }") w = Watcher(file_to_watch) w.run() +''' From 4378715cea71b977f7b52ee1976d159a424436bd Mon Sep 17 00:00:00 2001 From: cedricd Date: Thu, 16 Nov 2023 00:28:49 +0100 Subject: [PATCH 25/60] deleted non essential code fr starting graphdb-database --- docker/graphdb/Dockerfile | 2 +- docker/graphdb/initdb/init_graphdb.sh | 42 +-------------------------- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/docker/graphdb/Dockerfile b/docker/graphdb/Dockerfile index d860f87..ffa8da6 100644 --- a/docker/graphdb/Dockerfile +++ b/docker/graphdb/Dockerfile @@ -4,4 +4,4 @@ RUN mkdir -p /root/graphdb-import/data WORKDIR /root/graphdb-import/data COPY ./initdb /initdb -RUN cd /initdb && ./init_graphdb.sh \ No newline at end of file +RUN cd /initdb && sh ./init_graphdb.sh \ No newline at end of file diff --git a/docker/graphdb/initdb/init_graphdb.sh b/docker/graphdb/initdb/init_graphdb.sh index 76c1989..e3878ed 100755 --- a/docker/graphdb/initdb/init_graphdb.sh +++ b/docker/graphdb/initdb/init_graphdb.sh @@ -1,44 +1,4 @@ -#!/bin/bash -ex - -GDBPIDF="/tmp/init-graphdb-serv.pid" -GDBOUTF="/tmp/init-graphdb-out.txt" - -start_graphdb(){ - rm -f ${GDBPIDF} - graphdb -s -p ${GDBPIDF} >${GDBOUTF} 2>&1 & - sleep 1 -} - -wait_graphdb(){ - count=0 - while ! nc -z localhost 7200; do - count=$((count+1)) - if [ $count -gt 1000 ]; then - return - fi - # else - sleep 0.1 # wait for 1/10 of the second before check again - done -} - -stop_graphdb(){ - kill -9 $(cat ${GDBPIDF}) - sleep 1 - rm -f ${GDBPIDF} - rm -f ${GDBOUTF} -} - -createdb() { - curl -X POST http://localhost:7200/rest/repositories -H 'Content-Type: multipart/form-data' -F config=@lwua23-repo.ttl -} - - -# one could do it like this -#start_graphdb -#wait_graphdb -#createdb -#wait_configdb -#stop_graphdb +#!/bin/bash # but actually this just works too: REPODIR="/opt/graphdb/home/data/repositories/lwua23" From e41d322a099b0adbffebb0420d6da0aeeadaa4ca Mon Sep 17 00:00:00 2001 From: cedricd Date: Thu, 16 Nov 2023 12:41:32 +0100 Subject: [PATCH 26/60] watcher works, iri injest error on graph modifications though --- data/test.ttl | 0 .../lwua-py/lwua/graph_functions.py | 107 ++++++++++++++++++ docker/lwua-ingest/lwua-py/lwua/ingest.py | 106 +---------------- .../lwua-ingest/lwua-py/lwua/tryout_watch.py | 92 ++++++++++----- 4 files changed, 174 insertions(+), 131 deletions(-) create mode 100644 data/test.ttl create mode 100644 docker/lwua-ingest/lwua-py/lwua/graph_functions.py diff --git a/data/test.ttl b/data/test.ttl new file mode 100644 index 0000000..e69de29 diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py new file mode 100644 index 0000000..ebee753 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py @@ -0,0 +1,107 @@ +# this file will be used to trigger the injest processes into graphdb +# this prevents the injest.py and tryout_watch.py to be run at the same time with conflicting circular imports + +from SPARQLWrapper import SPARQLWrapper, JSON +from pathlib import Path +import logging +from rdflib import Graph +import os +from dotenv import load_dotenv +from .helpers import enable_logging, resolve_path + +log = logging.getLogger(__name__) +URN_BASE = "urn:lwua:INGEST" + +def data_path_from_config(): + local_default = str(resolve_path("./data", versus="dotenv")) + folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) + return Path(folder_name).absolute() + +def gdb_from_config(): + base = os.getenv("GDB_BASE", "http://localhost:7200") + repoid = os.getenv("GDB_REPO", "lwua23") + + endpoint = f"{ base }/repositories/{ repoid }" + updateEndpoint = endpoint + "/statements" # update statements are handled at other endpoint + + log.debug(f"using endpoint {endpoint}") + + gdb = SPARQLWrapper( + endpoint=endpoint, + updateEndpoint=updateEndpoint, + returnFormat=JSON, + agent="lwua-python-sparql-client" + ) + gdb.method = 'POST' + return gdb + +def ingest_graph(graph: Graph, context: str = None, replace_context: str = None): + log.debug(f"to insert data into <{ context }>") + + gdb = gdb_from_config() + + # do the cleanup if possible + if replace_context is not None and context is not None: + delete_graph(context, gdb) + + # extract the triples and format the insert statement + ntstr = graph.serialize(format="nt") + log.debug(f"extracted tiples == { ntstr }") + if context is not None: + inserts = f"INSERT DATA {{ GRAPH <{ context }> {{ { ntstr } }} }}" + else: + inserts = f"INSERT DATA {{ { ntstr} }}" + log.debug(f"INSERT of ==> { inserts }") + + gdb.setQuery(inserts) + log.debug(f"detected querytype == {gdb.queryType}") + + # unsure if this is needed -- can sqlwrapper detect this automatically? + gdb.queryType = 'INSERT' # important to indicate that the update Endpoint should be used + + gdb.query() + +def named_context(name: str, base: str = URN_BASE): + return f"{base}:{name}" # TODO maybe consider something else? + +def fname_2_context(fname: str): + return named_context(f"data/{fname}") + +def admin_context(): + return named_context("ADMIN") + +def delete_all_graphs(gdb): + deletes = f"DELETE WHERE {{ GRAPH ?g {{ ?s ?p ?o }} }}" + gdb.setQuery(deletes) + gdb.queryType = 'DELETE' + gdb.query() + +def delete_graph(context: str,gdb: SPARQLWrapper= None): + if gdb is None: + gdb = gdb_from_config() + deletes = f"DELETE WHERE {{ GRAPH <{ context }> {{ ?s ?p ?o }} }}" + gdb.setQuery(deletes) + gdb.queryType = 'DELETE' + gdb.query() + +def suffix_2_format(suffix): + if suffix in ["ttl", "turtle"]: + return "turtle" + if suffix in ["jsonld", "json"]: + return "json-ld" + # todo consider others if needed + return None + +def read_graph(fpath: Path, format: str = None): + format = format or suffix_2_format(fpath.suffix) + graph: Graph = Graph().parse(location=str(fpath), format=format) + return graph + +def ingest_data_file(fname, context: str = None, replace_context: str = None): + file_path = data_path_from_config() / fname + assert file_path.exists(), f"cannot ingest file at {file_path}" + + graph = read_graph(file_path) + + ingest_graph(graph, context=context, replace_context=replace_context) + # TODO maintain metadata triples last-ingest / last-modified of ingested file in some admin graph context diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index ab6f9eb..3c7ae2d 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -5,7 +5,8 @@ import os from dotenv import load_dotenv from .helpers import enable_logging, resolve_path -from .tryout_watch import Watcher, Handler +from .tryout_watch import Watcher +from .graph_functions import ingest_data_file, data_path_from_config log = logging.getLogger(__name__) @@ -19,7 +20,6 @@ def run_ingest(): w = Watcher(data_path) w.run() - # TODO -- immplement steps # list all the contents (files) in data_path together with last mod # get the <#admin-luwa-ingest> graph listing the maintained named-graphs and their lastmod @@ -29,107 +29,6 @@ def run_ingest(): # update the triple for the named-graph to lastmod in the admin grap -def data_path_from_config(): - local_default = str(resolve_path("./data", versus="dotenv")) - folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) - return Path(folder_name).absolute() - - -def gdb_from_config(): - base = os.getenv("GDB_BASE", "http://localhost:7200") - repoid = os.getenv("GDB_REPO", "lwua23") - - endpoint = f"{ base }/repositories/{ repoid }" - updateEndpoint = endpoint + "/statements" # update statements are handled at other endpoint - - log.debug(f"using endpoint {endpoint}") - - gdb = SPARQLWrapper( - endpoint=endpoint, - updateEndpoint=updateEndpoint, - returnFormat=JSON, - agent="lwua-python-sparql-client" - ) - gdb.method = 'POST' - return gdb - - -def ingest_graph(graph: Graph, context: str = None, replace: bool = False): - log.debug(f"to insert data into <{ context }>") - - gdb = gdb_from_config() - - # do the cleanup if possible - if replace and context is not None: - delete_graph(gdb, context) - - # extract the triples and format the insert statement - ntstr = graph.serialize(format="nt") - log.debug(f"extracted tiples == { ntstr }") - if context is not None: - inserts = f"INSERT DATA {{ GRAPH <{ context }> {{ { ntstr } }} }}" - else: - inserts = f"INSERT DATA {{ { ntstr} }}" - log.debug(f"INSERT of ==> { inserts }") - - gdb.setQuery(inserts) - log.debug(f"detected querytype == {gdb.queryType}") - - # unsure if this is needed -- can sqlwrapper detect this automatically? - gdb.queryType = 'INSERT' # important to indicate that the update Endpoint should be used - - gdb.query() - - -def named_context(name: str, base: str = URN_BASE): - return f"{base}:{name}" # TODO maybe consider something else? - - -def fname_2_context(fname: str): - return named_context(f"data/{fname}") - - -def admin_context(): - return named_context("ADMIN") - -def delete_all_graphs(gdb): - deletes = f"DELETE WHERE {{ GRAPH ?g {{ ?s ?p ?o }} }}" - gdb.setQuery(deletes) - gdb.queryType = 'DELETE' - gdb.query() - -def delete_graph(gbd, context: str): - deletes = f"DELETE WHERE {{ GRAPH <{ context }> {{ ?s ?p ?o }} }}" - gdb.setQuery(deletes) - gdb.queryType = 'DELETE' - gdb.query() - - -def suffix_2_format(suffix): - if suffix in ["ttl", "turtle"]: - return "turtle" - if suffix in ["jsonld", "json"]: - return "json-ld" - # todo consider others if needed - return None - - -def read_graph(fpath: Path, format: str = None): - format = format or suffix_2_format(fpath.suffix) - graph: Graph = Graph().parse(location=str(fpath), format=format) - return graph - - -def ingest_data_file(fname): - file_path = data_path_from_config() / fname - assert file_path.exists(), f"cannot ingest file at {file_path}" - - context = fname_2_context(fname) - graph = read_graph(file_path) # TODO capture lastmodified of this file too - - ingest_graph(graph, context=context) - # TODO maintain metadata triples last-ingest / last-modified of ingested file in some admin graph context - # Note: this main method allows to locally test outside docker # directly connecting to a localhost graphdb endpoint (which might be inside docker itself) @@ -138,6 +37,5 @@ def main(): enable_logging() ingest_data_file("project.ttl") - if __name__ == '__main__': main() diff --git a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py index 39c2a92..27b7cfd 100644 --- a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py +++ b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py @@ -1,46 +1,85 @@ import time import os +from pathlib import Path from dotenv import load_dotenv -from watchdog.observers import Observer -from watchdog.events import FileSystemEventHandler import logging from lwua.helpers import enable_logging, resolve_path +from lwua.graph_functions import delete_graph, ingest_data_file log = logging.getLogger(__name__) +### Cedric Decruw - 2023-11-16 ### +# ! watchdog was not used due to the fact that the docker container was not able to access the host filesystem +# ! this is a simple file watcher that will be used to trigger the ingest process +class FileStore: + def __init__(self, directory): + self.directory = directory + self.files = self.get_all_files() + self.last_files = None + + def get_all_files(self): + return {f: [os.path.getmtime(f)] for f in Path(self.directory).glob('**/*') if f.is_file()} + + def update(self): + new_files = self.get_all_files() + added = {f: new_files[f] for f in new_files if f not in self.files} + deleted = {f: self.files[f] for f in self.files if f not in new_files} + modified = {} + for f in new_files: + if f in self.files and new_files[f][0] != self.files[f][-1]: + if len(self.files[f]) == 2: + self.files[f].pop(0) + self.files[f].append(new_files[f][0]) + modified[f] = self.files[f] + self.last_files = self.files + self.files = new_files + return added, deleted, modified, self.last_files + class Watcher: def __init__(self, directory_to_watch): - self.observer = Observer() - self.directory_to_watch = directory_to_watch + self.directory_to_watch = resolve_path(os.getenv("GDB_DATA_FOLDER", os.path.join("/root/graphdb-import",directory_to_watch)), "dotenv").absolute() + log.info(f"env pointing to { self.directory_to_watch }") + self.file_store = FileStore(self.directory_to_watch) def run(self): - event_handler = Handler() - self.observer.schedule(event_handler, self.directory_to_watch, recursive=True) - self.observer.start() try: while True: - log.debug(f"observer loop {self.directory_to_watch}") + if self.file_store.last_files is None: + log.info("First time loop") + #do stuff for first time loop here like checking for existinf context in graphdb + + log.info("Checking for updates") + added, deleted, modified, previous_filestore = self.file_store.update() + for f in added: + log.info(f"File {f} has been added at {time.ctime(added[f][0])}") + context = f"{f}_{format_time(added[f][0])}" + log.info(f"context to be used to add: {context}") + #ingest_data_file(f,context) + for f in deleted: + log.info(f"File {f} has been deleted") + #find the file in the previous filestore and use the last modified time to delete the graph + context = f"{f}_{format_time(previous_filestore[f][-1])}" + log.info(f"context to be used to delete: {context}") + #delete_graph(context) + for f in modified: + log.info(f"File {f} has been modified at {time.ctime(modified[f][-1])}, previous to last modified time: {time.ctime(modified[f][0]) if len(modified[f]) > 1 else 'N/A'}") + context_to_delete = f"{f}_{format_time(modified[f][0])}" + context_to_add = f"{f}_{format_time(modified[f][-1])}" + log.info(f"context to be used to delete: {context_to_delete}") + log.info(f"context to be used to add: {context_to_add}") + #ingest_data_file(f,context_to_add, context_to_delete) + time.sleep(5) except KeyboardInterrupt: - log.info("Stopping watcher") - self.observer.stop() - log.info("Stopping observer") - self.observer.join() - -class Handler(FileSystemEventHandler): - def on_modified(self, event): - log.info(f"File {event.src_path} has been modified") - - def on_created(self, event): - log.info(f"File {event.src_path} has been created") - - def on_deleted(self, event): - log.info(f"File {event.src_path} has been deleted") - - def on_moved(self, event): - log.info(f"File {event.src_path} has been moved to {event.dest_path}") + log.info("Stopping watcher") + +def format_time(epoch_time): + # Convert epoch time to struct_time object + time_obj = time.localtime(epoch_time) + # Format to return %Y_%m_%d_%H_%M_%S + return time.strftime("%Y_%m_%d_%H_%M_%S", time_obj) -''' +## test the watcher on local file system - not in docker if __name__ == '__main__': load_dotenv() enable_logging() @@ -48,4 +87,3 @@ def on_moved(self, event): log.info(f"env pointing to { file_to_watch }") w = Watcher(file_to_watch) w.run() -''' From e66d9156e4d862911d48b3d496664ccee6172234 Mon Sep 17 00:00:00 2001 From: cedricd Date: Thu, 16 Nov 2023 13:56:51 +0100 Subject: [PATCH 27/60] working injest , no auto --- data/test.ttl | 0 .../lwua-py/lwua/graph_functions.py | 49 +++++++++++++++++-- .../lwua-ingest/lwua-py/lwua/tryout_watch.py | 6 +-- project_2.ttl | 32 ++++++++++++ 4 files changed, 81 insertions(+), 6 deletions(-) delete mode 100644 data/test.ttl create mode 100644 project_2.ttl diff --git a/data/test.ttl b/data/test.ttl deleted file mode 100644 index e69de29..0000000 diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py index ebee753..6bb97c4 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py +++ b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py @@ -5,6 +5,7 @@ from pathlib import Path import logging from rdflib import Graph +import time import os from dotenv import load_dotenv from .helpers import enable_logging, resolve_path @@ -42,7 +43,8 @@ def ingest_graph(graph: Graph, context: str = None, replace_context: str = None) # do the cleanup if possible if replace_context is not None and context is not None: - delete_graph(context, gdb) + log.debug(f"deleting <{ replace_context }> before insert") + delete_graph(replace_context, gdb) # extract the triples and format the insert statement ntstr = graph.serialize(format="nt") @@ -58,8 +60,10 @@ def ingest_graph(graph: Graph, context: str = None, replace_context: str = None) # unsure if this is needed -- can sqlwrapper detect this automatically? gdb.queryType = 'INSERT' # important to indicate that the update Endpoint should be used - gdb.query() + + # add the graph to the admin graph + add_graph_to_admin(context,gdb) def named_context(name: str, base: str = URN_BASE): return f"{base}:{name}" # TODO maybe consider something else? @@ -70,6 +74,35 @@ def fname_2_context(fname: str): def admin_context(): return named_context("ADMIN") +def add_graph_to_admin(context: str, gdb: SPARQLWrapper = None): + if URN_BASE not in context: + context = named_context(context) + if gdb is None: + gdb = gdb_from_config() + inserts = f"INSERT DATA {{ GRAPH <{ admin_context() }> {{ <{ context }> \"{time.time()}\"^^ }} }}" + gdb.setQuery(inserts) + gdb.queryType = 'INSERT' + gdb.query() + +def get_admin_graph(gdb: SPARQLWrapper = None): + if gdb is None: + gdb = gdb_from_config() + selects = f"SELECT ?g ?m WHERE {{ GRAPH <{ admin_context() }> {{ ?g ?m }} }}" + gdb.setQuery(selects) + gdb.queryType = 'SELECT' + results = gdb.query().convert() + return results + +def delete_graph_from_admin(context: str, gdb: SPARQLWrapper = None): + if URN_BASE not in context: + context = named_context(context) + if gdb is None: + gdb = gdb_from_config() + deletes = f"DELETE WHERE {{ GRAPH <{ admin_context() }> {{ <{ context }> ?m }} }}" + gdb.setQuery(deletes) + gdb.queryType = 'DELETE' + gdb.query() + def delete_all_graphs(gdb): deletes = f"DELETE WHERE {{ GRAPH ?g {{ ?s ?p ?o }} }}" gdb.setQuery(deletes) @@ -77,12 +110,18 @@ def delete_all_graphs(gdb): gdb.query() def delete_graph(context: str,gdb: SPARQLWrapper= None): + if URN_BASE not in context: + context = named_context(context) if gdb is None: gdb = gdb_from_config() deletes = f"DELETE WHERE {{ GRAPH <{ context }> {{ ?s ?p ?o }} }}" gdb.setQuery(deletes) gdb.queryType = 'DELETE' gdb.query() + + #delete the graph from the admin graph + delete_graph_from_admin(context,gdb) + def suffix_2_format(suffix): if suffix in ["ttl", "turtle"]: @@ -102,6 +141,10 @@ def ingest_data_file(fname, context: str = None, replace_context: str = None): assert file_path.exists(), f"cannot ingest file at {file_path}" graph = read_graph(file_path) + iri_context = named_context(context) + iri_replace_context = named_context(replace_context) if replace_context is not None else None + + log.info(f"ingesting {file_path} into {iri_context} replacing {iri_replace_context}") - ingest_graph(graph, context=context, replace_context=replace_context) + ingest_graph(graph, context=iri_context, replace_context=iri_replace_context) # TODO maintain metadata triples last-ingest / last-modified of ingested file in some admin graph context diff --git a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py index 27b7cfd..65ae244 100644 --- a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py +++ b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py @@ -54,20 +54,20 @@ def run(self): log.info(f"File {f} has been added at {time.ctime(added[f][0])}") context = f"{f}_{format_time(added[f][0])}" log.info(f"context to be used to add: {context}") - #ingest_data_file(f,context) + ingest_data_file(f,context) for f in deleted: log.info(f"File {f} has been deleted") #find the file in the previous filestore and use the last modified time to delete the graph context = f"{f}_{format_time(previous_filestore[f][-1])}" log.info(f"context to be used to delete: {context}") - #delete_graph(context) + delete_graph(context) for f in modified: log.info(f"File {f} has been modified at {time.ctime(modified[f][-1])}, previous to last modified time: {time.ctime(modified[f][0]) if len(modified[f]) > 1 else 'N/A'}") context_to_delete = f"{f}_{format_time(modified[f][0])}" context_to_add = f"{f}_{format_time(modified[f][-1])}" log.info(f"context to be used to delete: {context_to_delete}") log.info(f"context to be used to add: {context_to_add}") - #ingest_data_file(f,context_to_add, context_to_delete) + ingest_data_file(f,context_to_add, context_to_delete) time.sleep(5) except KeyboardInterrupt: diff --git a/project_2.ttl b/project_2.ttl new file mode 100644 index 0000000..5ff0f5a --- /dev/null +++ b/project_2.ttl @@ -0,0 +1,32 @@ +@prefix schema: . +@prefix xsd: . +@prefix cc: . + + +#todo root this project somewhere decent -- maybe zenodo-github-doi ? + a schema:Project ; + schema:name "Lifewatch User Analysis 2023"^^xsd:string ; + schema:description "This project performs the 2023 Lifewatch User Analysis Reporting."^^xsd:string ; + schema:license ; + schema:accountablePerson ; + schema:contributor ; + schema:contributor ; + schema:creator . + + a schema:Person ; + schema:name "Laurian van Maldghem"^^xsd:string . + + a schema:Person ; + schema:name "Marc Portier"^^xsd:string . + + a schema:Person ; + schema:name "Cedric Decruw"^^xsd:string . + + a schema:Organization ; + schema:name "VLIZ vzw"^^xsd:string . + + a cc:License ; + cc:legalcode ; + cc:permits cc:Reproduction, cc:Distribution, cc:DerivativeWorks ; + cc:requires cc:Notice ; + cc:prohibits cc:CCCommercialUse . \ No newline at end of file From 6a83d3e03f9fea967b950da78e9a322ed9afe71e Mon Sep 17 00:00:00 2001 From: cedricd Date: Thu, 16 Nov 2023 16:57:44 +0100 Subject: [PATCH 28/60] auto injest complete --- project_2.ttl => data/project_2.ttl | 2 +- .../lwua-py/lwua/graph_functions.py | 9 +- .../lwua-ingest/lwua-py/lwua/tryout_watch.py | 85 +++++++++++++++++-- 3 files changed, 84 insertions(+), 12 deletions(-) rename project_2.ttl => data/project_2.ttl (96%) diff --git a/project_2.ttl b/data/project_2.ttl similarity index 96% rename from project_2.ttl rename to data/project_2.ttl index 5ff0f5a..b575c7f 100644 --- a/project_2.ttl +++ b/data/project_2.ttl @@ -14,7 +14,7 @@ schema:creator . a schema:Person ; - schema:name "Laurian van Maldghem"^^xsd:string . + schema:name "Laurian van Maldeghem"^^xsd:string . a schema:Person ; schema:name "Marc Portier"^^xsd:string . diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py index 6bb97c4..652f020 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py +++ b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py @@ -87,12 +87,14 @@ def add_graph_to_admin(context: str, gdb: SPARQLWrapper = None): def get_admin_graph(gdb: SPARQLWrapper = None): if gdb is None: gdb = gdb_from_config() - selects = f"SELECT ?g ?m WHERE {{ GRAPH <{ admin_context() }> {{ ?g ?m }} }}" - gdb.setQuery(selects) - gdb.queryType = 'SELECT' + #get full admin graph + query = f"SELECT ?g ?m WHERE {{ GRAPH <{ admin_context() }> {{ ?g ?m }} }}" + gdb.setQuery(query) + gdb.setReturnFormat(JSON) results = gdb.query().convert() return results + def delete_graph_from_admin(context: str, gdb: SPARQLWrapper = None): if URN_BASE not in context: context = named_context(context) @@ -121,7 +123,6 @@ def delete_graph(context: str,gdb: SPARQLWrapper= None): #delete the graph from the admin graph delete_graph_from_admin(context,gdb) - def suffix_2_format(suffix): if suffix in ["ttl", "turtle"]: diff --git a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py index 65ae244..54c760c 100644 --- a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py +++ b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py @@ -4,10 +4,12 @@ from dotenv import load_dotenv import logging from lwua.helpers import enable_logging, resolve_path -from lwua.graph_functions import delete_graph, ingest_data_file +from lwua.graph_functions import delete_graph, ingest_data_file, get_admin_graph log = logging.getLogger(__name__) +URN_BASE = "urn:lwua:INGEST" # !TODO: shouldn't this be in the env file? + ### Cedric Decruw - 2023-11-16 ### # ! watchdog was not used due to the fact that the docker container was not able to access the host filesystem # ! this is a simple file watcher that will be used to trigger the ingest process @@ -46,25 +48,91 @@ def run(self): while True: if self.file_store.last_files is None: log.info("First time loop") - #do stuff for first time loop here like checking for existinf context in graphdb + + #try and get the graph , if this fails due to the graphdb server not being up yet, the watcher will try again in 5 seconds untill it succeeds + get_admin = True + while get_admin: + try: + admin_graph = get_admin_graph() + log.info(f"admin graph: {admin_graph}") + # admin graph looks like {'head': {'vars': ['g', 'm']}, 'results': {'bindings': [{'g': {'type': 'uri', 'value': 'urn:lwua:INGEST#admin-lwua-ingest'}, 'm': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime', 'type': 'literal', 'value': '2021-09-16T14:00:00Z'}}}]}} + # we need to extract all the g.value's and put them in a list + all_graphs_unformatted = [g['g']['value'] for g in admin_graph['results']['bindings']] + #replace the URN_BASE with the empty string to get the context + all_graphs = [g.replace(f"{URN_BASE}:","") for g in all_graphs_unformatted] + + #from this extract the name and last modified time of the graph by splitting on the last "_" + info_admin_graph = [] + for g in all_graphs: + info_g = {} + #split the string on the last "_" to get the name and last modified time + name, last_modified = g.rsplit("_",1) + info_g['name'] = name + info_g['last_modified'] = last_modified + info_admin_graph.append(info_g) + + log.info(f"all graphs: {info_admin_graph}") + get_admin = False + + #get all the files in the data folder recursively + all_files = self.file_store.get_all_files() + #compare the files to the admin graph + # if filename is not in the admin graph, add it + # if filename is in the admin graph, check if the last modified time is the same, if not update the graph + # if filename is in the admin graph, but not in the files, delete the graph + + for f in all_files: + #check if the file is in the admin graph + if f not in all_graphs: + #add the file to the graph + context = f"{f}_{all_files[f][0]}" + log.info(f"context to be used to add: {context}") + ingest_data_file(f,context) + else: + #check if the last modified time is the same + if all_files[f][0] != info_admin_graph[all_graphs.index(f)]['last_modified']: + #update the graph + context_to_delete = f"{f}_{info_admin_graph[all_graphs.index(f)]['last_modified']}" + context_to_add = f"{f}_{all_files[f][0]}" + log.info(f"context to be used to delete: {context_to_delete}") + log.info(f"context to be used to add: {context_to_add}") + ingest_data_file(f,context_to_add, context_to_delete) + + #check if there are any graphs in the admin graph that are not in the files + for g in info_admin_graph: + if g['name'] not in all_files: + log.info(f"graph {g['name']} not in files, deleting") + #delete the graph + context = f"{g['name']}_{g['last_modified']}" + log.info(f"context to be used to delete: {context}") + delete_graph(context) + + except: + log.info("graphdb server not up yet, trying again in 1 second") + time.sleep(1) + continue log.info("Checking for updates") added, deleted, modified, previous_filestore = self.file_store.update() for f in added: log.info(f"File {f} has been added at {time.ctime(added[f][0])}") - context = f"{f}_{format_time(added[f][0])}" + #context = f"{f}_{format_time(added[f][0])}" + context = f"{f}_{added[f][0]}" log.info(f"context to be used to add: {context}") ingest_data_file(f,context) for f in deleted: log.info(f"File {f} has been deleted") #find the file in the previous filestore and use the last modified time to delete the graph - context = f"{f}_{format_time(previous_filestore[f][-1])}" + #context = f"{f}_{format_time(previous_filestore[f][-1])}" + context = f"{f}_{previous_filestore[f][-1]}" log.info(f"context to be used to delete: {context}") delete_graph(context) for f in modified: log.info(f"File {f} has been modified at {time.ctime(modified[f][-1])}, previous to last modified time: {time.ctime(modified[f][0]) if len(modified[f]) > 1 else 'N/A'}") - context_to_delete = f"{f}_{format_time(modified[f][0])}" - context_to_add = f"{f}_{format_time(modified[f][-1])}" + #context_to_delete = f"{f}_{format_time(modified[f][0])}" + #context_to_add = f"{f}_{format_time(modified[f][-1])}" + context_to_delete = f"{f}_{modified[f][0]}" + context_to_add = f"{f}_{modified[f][-1]}" log.info(f"context to be used to delete: {context_to_delete}") log.info(f"context to be used to add: {context_to_add}") ingest_data_file(f,context_to_add, context_to_delete) @@ -72,12 +140,15 @@ def run(self): time.sleep(5) except KeyboardInterrupt: log.info("Stopping watcher") - + +#left out this part of the code since it is easier to just use the epoch time as the context , that way there is no need to reverse the process for the startup of the watcher +''' def format_time(epoch_time): # Convert epoch time to struct_time object time_obj = time.localtime(epoch_time) # Format to return %Y_%m_%d_%H_%M_%S return time.strftime("%Y_%m_%d_%H_%M_%S", time_obj) +''' ## test the watcher on local file system - not in docker if __name__ == '__main__': From 4731fe7e16c8a0a7d15b51f28f22e63875222e18 Mon Sep 17 00:00:00 2001 From: cedricd Date: Fri, 17 Nov 2023 01:03:12 +0100 Subject: [PATCH 29/60] small refactoring - deleted watchdog => not needed - deleted project2.ttl => not useful --- data/project.ttl | 3 + data/project_2.ttl | 32 ----- .../lwua-py/lwua/graph_functions.py | 73 +++++++--- .../lwua-ingest/lwua-py/lwua/tryout_watch.py | 128 ++++++------------ docker/lwua-ingest/lwua-py/poetry.lock | 58 +------- docker/lwua-ingest/lwua-py/pyproject.toml | 3 +- 6 files changed, 97 insertions(+), 200 deletions(-) delete mode 100644 data/project_2.ttl diff --git a/data/project.ttl b/data/project.ttl index bf11489..90aca3d 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -15,6 +15,9 @@ a schema:Person ; schema:name "Laurian van Maldghem"^^xsd:string . + a schema:Person ; + schema:name "AlainProvist"^^xsd:string . + a schema:Person ; schema:name "Marc Portier"^^xsd:string . diff --git a/data/project_2.ttl b/data/project_2.ttl deleted file mode 100644 index b575c7f..0000000 --- a/data/project_2.ttl +++ /dev/null @@ -1,32 +0,0 @@ -@prefix schema: . -@prefix xsd: . -@prefix cc: . - - -#todo root this project somewhere decent -- maybe zenodo-github-doi ? - a schema:Project ; - schema:name "Lifewatch User Analysis 2023"^^xsd:string ; - schema:description "This project performs the 2023 Lifewatch User Analysis Reporting."^^xsd:string ; - schema:license ; - schema:accountablePerson ; - schema:contributor ; - schema:contributor ; - schema:creator . - - a schema:Person ; - schema:name "Laurian van Maldeghem"^^xsd:string . - - a schema:Person ; - schema:name "Marc Portier"^^xsd:string . - - a schema:Person ; - schema:name "Cedric Decruw"^^xsd:string . - - a schema:Organization ; - schema:name "VLIZ vzw"^^xsd:string . - - a cc:License ; - cc:legalcode ; - cc:permits cc:Reproduction, cc:Distribution, cc:DerivativeWorks ; - cc:requires cc:Notice ; - cc:prohibits cc:CCCommercialUse . \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py index 652f020..912dcf7 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py +++ b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py @@ -36,24 +36,24 @@ def gdb_from_config(): gdb.method = 'POST' return gdb -def ingest_graph(graph: Graph, context: str = None, replace_context: str = None): +def ingest_graph(graph: Graph, context: str = None, replace: bool = False): log.debug(f"to insert data into <{ context }>") gdb = gdb_from_config() # do the cleanup if possible - if replace_context is not None and context is not None: - log.debug(f"deleting <{ replace_context }> before insert") - delete_graph(replace_context, gdb) + if replace and context is not None: + log.debug(f"deleting <{ context }> before insert") + delete_graph(context, gdb) # extract the triples and format the insert statement ntstr = graph.serialize(format="nt") - log.debug(f"extracted tiples == { ntstr }") + #log.debug(f"extracted tiples == { ntstr }") # comented out because it is too verbose if context is not None: inserts = f"INSERT DATA {{ GRAPH <{ context }> {{ { ntstr } }} }}" else: inserts = f"INSERT DATA {{ { ntstr} }}" - log.debug(f"INSERT of ==> { inserts }") + #log.debug(f"INSERT of ==> { inserts }") gdb.setQuery(inserts) log.debug(f"detected querytype == {gdb.queryType}") @@ -61,46 +61,71 @@ def ingest_graph(graph: Graph, context: str = None, replace_context: str = None) # unsure if this is needed -- can sqlwrapper detect this automatically? gdb.queryType = 'INSERT' # important to indicate that the update Endpoint should be used gdb.query() - - # add the graph to the admin graph - add_graph_to_admin(context,gdb) + register_context_lastmod(context,gdb) def named_context(name: str, base: str = URN_BASE): return f"{base}:{name}" # TODO maybe consider something else? def fname_2_context(fname: str): - return named_context(f"data/{fname}") + #return named_context(f"data/{fname}") # /data prefix is not needed until we have multiple data folders to ingest from + return named_context(fname) def admin_context(): return named_context("ADMIN") -def add_graph_to_admin(context: str, gdb: SPARQLWrapper = None): +def register_context_lastmod(context: str, gdb: SPARQLWrapper = None): if URN_BASE not in context: context = named_context(context) if gdb is None: gdb = gdb_from_config() - inserts = f"INSERT DATA {{ GRAPH <{ admin_context() }> {{ <{ context }> \"{time.time()}\"^^ }} }}" + + c_time = time.time() + + #check if the context is already in the admin graph, if so check if the last modified time of the return is smaller than the current time + #if so, update the last modified time of the context + #if not, do nothing + lastmod = get_context_lastmod(context,gdb) + if lastmod['results']['bindings']: + if float(lastmod['results']['bindings'][0]['m']['value']) < c_time: + #delete the old last modified time + delete_context_from_admin(context,gdb) + + #tsparql for insert + # GRAPH <{ admin_context() }> {{ <{ context }> \"{time.time()}\"^^ }} + inserts = f"INSERT DATA {{ GRAPH <{ admin_context() }> {{ <{ context }> \"{time.time()}\"^^ }} }}" gdb.setQuery(inserts) gdb.queryType = 'INSERT' gdb.query() +def get_context_lastmod(context: str, gdb: SPARQLWrapper = None): + if URN_BASE not in context: + context = named_context(context) + if gdb is None: + gdb = gdb_from_config() + #get last modified time of context + query = f"SELECT ?m WHERE {{ GRAPH <{ admin_context() }> {{ <{ context }> ?m }} }}" + gdb.setQuery(query) + gdb.setReturnFormat(JSON) + results = gdb.query().convert() + return results + def get_admin_graph(gdb: SPARQLWrapper = None): if gdb is None: gdb = gdb_from_config() #get full admin graph - query = f"SELECT ?g ?m WHERE {{ GRAPH <{ admin_context() }> {{ ?g ?m }} }}" + query = f"SELECT ?g ?m WHERE {{ GRAPH <{ admin_context() }> {{ ?g ?m }} }}" gdb.setQuery(query) gdb.setReturnFormat(JSON) results = gdb.query().convert() return results -def delete_graph_from_admin(context: str, gdb: SPARQLWrapper = None): +def delete_context_from_admin(context: str, gdb: SPARQLWrapper = None): if URN_BASE not in context: context = named_context(context) if gdb is None: gdb = gdb_from_config() - deletes = f"DELETE WHERE {{ GRAPH <{ admin_context() }> {{ <{ context }> ?m }} }}" + deletes = f"DELETE WHERE {{ GRAPH <{ admin_context() }> {{ <{ context }> ?m }} }}" gdb.setQuery(deletes) gdb.queryType = 'DELETE' gdb.query() @@ -122,7 +147,7 @@ def delete_graph(context: str,gdb: SPARQLWrapper= None): gdb.query() #delete the graph from the admin graph - delete_graph_from_admin(context,gdb) + delete_context_from_admin(context,gdb) def suffix_2_format(suffix): if suffix in ["ttl", "turtle"]: @@ -137,15 +162,21 @@ def read_graph(fpath: Path, format: str = None): graph: Graph = Graph().parse(location=str(fpath), format=format) return graph -def ingest_data_file(fname, context: str = None, replace_context: str = None): +def delete_data_file(fname): + context = fname_2_context(fname) + log.info(f"deleting {fname} from {context}") + delete_graph(context) + +def ingest_data_file(fname, context: str = None, replace: bool = False): file_path = data_path_from_config() / fname assert file_path.exists(), f"cannot ingest file at {file_path}" graph = read_graph(file_path) - iri_context = named_context(context) - iri_replace_context = named_context(replace_context) if replace_context is not None else None - log.info(f"ingesting {file_path} into {iri_context} replacing {iri_replace_context}") + if context is None: + context = fname_2_context(fname) + + log.info(f"ingesting {file_path} into {context} | replace : {replace}") - ingest_graph(graph, context=iri_context, replace_context=iri_replace_context) + ingest_graph(graph, context=context, replace=replace) # TODO maintain metadata triples last-ingest / last-modified of ingested file in some admin graph context diff --git a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py index 54c760c..83a4324 100644 --- a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py +++ b/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv import logging from lwua.helpers import enable_logging, resolve_path -from lwua.graph_functions import delete_graph, ingest_data_file, get_admin_graph +from lwua.graph_functions import ingest_data_file, delete_data_file, get_admin_graph log = logging.getLogger(__name__) @@ -13,11 +13,10 @@ ### Cedric Decruw - 2023-11-16 ### # ! watchdog was not used due to the fact that the docker container was not able to access the host filesystem # ! this is a simple file watcher that will be used to trigger the ingest process -class FileStore: +class Observer: def __init__(self, directory): self.directory = directory self.files = self.get_all_files() - self.last_files = None def get_all_files(self): return {f: [os.path.getmtime(f)] for f in Path(self.directory).glob('**/*') if f.is_file()} @@ -28,128 +27,77 @@ def update(self): deleted = {f: self.files[f] for f in self.files if f not in new_files} modified = {} for f in new_files: - if f in self.files and new_files[f][0] != self.files[f][-1]: - if len(self.files[f]) == 2: - self.files[f].pop(0) - self.files[f].append(new_files[f][0]) + if f in self.files and new_files[f] != self.files[f]: + self.files[f] = new_files[f] modified[f] = self.files[f] - self.last_files = self.files + self.files = new_files - return added, deleted, modified, self.last_files + return added, deleted, modified class Watcher: def __init__(self, directory_to_watch): self.directory_to_watch = resolve_path(os.getenv("GDB_DATA_FOLDER", os.path.join("/root/graphdb-import",directory_to_watch)), "dotenv").absolute() log.info(f"env pointing to { self.directory_to_watch }") - self.file_store = FileStore(self.directory_to_watch) + self.observer = Observer(self.directory_to_watch) + self.first_loop = True def run(self): try: while True: - if self.file_store.last_files is None: + if self.first_loop: log.info("First time loop") - #try and get the graph , if this fails due to the graphdb server not being up yet, the watcher will try again in 5 seconds untill it succeeds get_admin = True while get_admin: try: + #get admin graph admin_graph = get_admin_graph() - log.info(f"admin graph: {admin_graph}") - # admin graph looks like {'head': {'vars': ['g', 'm']}, 'results': {'bindings': [{'g': {'type': 'uri', 'value': 'urn:lwua:INGEST#admin-lwua-ingest'}, 'm': {'datatype': 'http://www.w3.org/2001/XMLSchema#dateTime', 'type': 'literal', 'value': '2021-09-16T14:00:00Z'}}}]}} - # we need to extract all the g.value's and put them in a list - all_graphs_unformatted = [g['g']['value'] for g in admin_graph['results']['bindings']] - #replace the URN_BASE with the empty string to get the context - all_graphs = [g.replace(f"{URN_BASE}:","") for g in all_graphs_unformatted] - - #from this extract the name and last modified time of the graph by splitting on the last "_" - info_admin_graph = [] - for g in all_graphs: - info_g = {} - #split the string on the last "_" to get the name and last modified time - name, last_modified = g.rsplit("_",1) - info_g['name'] = name - info_g['last_modified'] = last_modified - info_admin_graph.append(info_g) - - log.info(f"all graphs: {info_admin_graph}") - get_admin = False - - #get all the files in the data folder recursively - all_files = self.file_store.get_all_files() - #compare the files to the admin graph + info_admin = [(g['g']['value'].replace(f"{URN_BASE}:", ''), g['m']['value']) for g in admin_graph['results']['bindings']] + all_files = self.observer.get_all_files() + #compare the files to the admin graph # if filename is not in the admin graph, add it # if filename is in the admin graph, check if the last modified time is the same, if not update the graph # if filename is in the admin graph, but not in the files, delete the graph - + info_admin_dict = {g[0]: g[1] for g in info_admin} for f in all_files: - #check if the file is in the admin graph - if f not in all_graphs: - #add the file to the graph - context = f"{f}_{all_files[f][0]}" - log.info(f"context to be used to add: {context}") - ingest_data_file(f,context) - else: - #check if the last modified time is the same - if all_files[f][0] != info_admin_graph[all_graphs.index(f)]['last_modified']: - #update the graph - context_to_delete = f"{f}_{info_admin_graph[all_graphs.index(f)]['last_modified']}" - context_to_add = f"{f}_{all_files[f][0]}" - log.info(f"context to be used to delete: {context_to_delete}") - log.info(f"context to be used to add: {context_to_add}") - ingest_data_file(f,context_to_add, context_to_delete) - - #check if there are any graphs in the admin graph that are not in the files - for g in info_admin_graph: - if g['name'] not in all_files: - log.info(f"graph {g['name']} not in files, deleting") - #delete the graph - context = f"{g['name']}_{g['last_modified']}" - log.info(f"context to be used to delete: {context}") - delete_graph(context) + if f in info_admin_dict: + # !TODO: check variables since the modified now just deletes the graph :/ + if info_admin_dict[f] < all_files[f][0]: + log.info(f"File {f} has been modified since downtime, updating graph") + ingest_data_file(f, None, True) + continue + log.info(f"File {f} has been added since downtime, adding graph") + ingest_data_file(f) + + for g in info_admin: + if g[0] not in all_files: + log.info(f"File {g[0]} has been deleted since downtime, deleting graph") + delete_data_file(g[0]) - except: - log.info("graphdb server not up yet, trying again in 1 second") + except Exception as e: + log.error(f"error: {e}") time.sleep(1) continue + + get_admin = False + self.first_loop = False log.info("Checking for updates") - added, deleted, modified, previous_filestore = self.file_store.update() + added, deleted, modified = self.observer.update() for f in added: - log.info(f"File {f} has been added at {time.ctime(added[f][0])}") - #context = f"{f}_{format_time(added[f][0])}" - context = f"{f}_{added[f][0]}" - log.info(f"context to be used to add: {context}") - ingest_data_file(f,context) + log.info(f"File {f} has been added ") + ingest_data_file(f) for f in deleted: log.info(f"File {f} has been deleted") - #find the file in the previous filestore and use the last modified time to delete the graph - #context = f"{f}_{format_time(previous_filestore[f][-1])}" - context = f"{f}_{previous_filestore[f][-1]}" - log.info(f"context to be used to delete: {context}") - delete_graph(context) + delete_data_file(f) for f in modified: - log.info(f"File {f} has been modified at {time.ctime(modified[f][-1])}, previous to last modified time: {time.ctime(modified[f][0]) if len(modified[f]) > 1 else 'N/A'}") - #context_to_delete = f"{f}_{format_time(modified[f][0])}" - #context_to_add = f"{f}_{format_time(modified[f][-1])}" - context_to_delete = f"{f}_{modified[f][0]}" - context_to_add = f"{f}_{modified[f][-1]}" - log.info(f"context to be used to delete: {context_to_delete}") - log.info(f"context to be used to add: {context_to_add}") - ingest_data_file(f,context_to_add, context_to_delete) + log.info(f"File {f} has been modified") + ingest_data_file(f,None, True) time.sleep(5) except KeyboardInterrupt: log.info("Stopping watcher") -#left out this part of the code since it is easier to just use the epoch time as the context , that way there is no need to reverse the process for the startup of the watcher -''' -def format_time(epoch_time): - # Convert epoch time to struct_time object - time_obj = time.localtime(epoch_time) - # Format to return %Y_%m_%d_%H_%M_%S - return time.strftime("%Y_%m_%d_%H_%M_%S", time_obj) -''' - ## test the watcher on local file system - not in docker if __name__ == '__main__': load_dotenv() diff --git a/docker/lwua-ingest/lwua-py/poetry.lock b/docker/lwua-ingest/lwua-py/poetry.lock index 82c8400..4f1e3c3 100644 --- a/docker/lwua-ingest/lwua-py/poetry.lock +++ b/docker/lwua-ingest/lwua-py/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "apscheduler" version = "3.10.4" description = "In-process task scheduler with Cron-like capabilities" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -15,7 +14,7 @@ files = [ [package.dependencies] pytz = "*" six = ">=1.4.0" -tzlocal = ">=2.0,<3.0.0 || >=4.0.0" +tzlocal = ">=2.0,<3.dev0 || >=4.dev0" [package.extras] doc = ["sphinx", "sphinx-rtd-theme"] @@ -33,7 +32,6 @@ zookeeper = ["kazoo"] name = "isodate" version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" -category = "main" optional = false python-versions = "*" files = [ @@ -48,7 +46,6 @@ six = "*" name = "pyaml" version = "23.9.3" description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -66,7 +63,6 @@ anchors = ["unidecode"] name = "pyparsing" version = "3.1.1" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" optional = false python-versions = ">=3.6.8" files = [ @@ -81,7 +77,6 @@ diagrams = ["jinja2", "railroad-diagrams"] name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -96,7 +91,6 @@ cli = ["click (>=5.0)"] name = "pytz" version = "2023.3.post1" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -108,7 +102,6 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -168,7 +161,6 @@ files = [ name = "rdflib" version = "7.0.0" description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." -category = "main" optional = false python-versions = ">=3.8.1,<4.0.0" files = [ @@ -190,7 +182,6 @@ networkx = ["networkx (>=2.0.0,<3.0.0)"] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -202,7 +193,6 @@ files = [ name = "sparqlwrapper" version = "2.0.0" description = "SPARQL Endpoint interface to Python" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -223,7 +213,6 @@ pandas = ["pandas (>=1.3.5)"] name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" -category = "main" optional = false python-versions = ">=2" files = [ @@ -235,7 +224,6 @@ files = [ name = "tzlocal" version = "5.0.1" description = "tzinfo object for the local timezone" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -249,47 +237,7 @@ tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] -[[package]] -name = "watchdog" -version = "3.0.0" -description = "Filesystem events monitoring" -category = "main" -optional = false -python-versions = ">=3.7" -files = [ - {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:336adfc6f5cc4e037d52db31194f7581ff744b67382eb6021c868322e32eef41"}, - {file = "watchdog-3.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a70a8dcde91be523c35b2bf96196edc5730edb347e374c7de7cd20c43ed95397"}, - {file = "watchdog-3.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:adfdeab2da79ea2f76f87eb42a3ab1966a5313e5a69a0213a3cc06ef692b0e96"}, - {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2b57a1e730af3156d13b7fdddfc23dea6487fceca29fc75c5a868beed29177ae"}, - {file = "watchdog-3.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7ade88d0d778b1b222adebcc0927428f883db07017618a5e684fd03b83342bd9"}, - {file = "watchdog-3.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7e447d172af52ad204d19982739aa2346245cc5ba6f579d16dac4bfec226d2e7"}, - {file = "watchdog-3.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9fac43a7466eb73e64a9940ac9ed6369baa39b3bf221ae23493a9ec4d0022674"}, - {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8ae9cda41fa114e28faf86cb137d751a17ffd0316d1c34ccf2235e8a84365c7f"}, - {file = "watchdog-3.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:25f70b4aa53bd743729c7475d7ec41093a580528b100e9a8c5b5efe8899592fc"}, - {file = "watchdog-3.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4f94069eb16657d2c6faada4624c39464f65c05606af50bb7902e036e3219be3"}, - {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7c5f84b5194c24dd573fa6472685b2a27cc5a17fe5f7b6fd40345378ca6812e3"}, - {file = "watchdog-3.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3aa7f6a12e831ddfe78cdd4f8996af9cf334fd6346531b16cec61c3b3c0d8da0"}, - {file = "watchdog-3.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:233b5817932685d39a7896b1090353fc8efc1ef99c9c054e46c8002561252fb8"}, - {file = "watchdog-3.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:13bbbb462ee42ec3c5723e1205be8ced776f05b100e4737518c67c8325cf6100"}, - {file = "watchdog-3.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:8f3ceecd20d71067c7fd4c9e832d4e22584318983cabc013dbf3f70ea95de346"}, - {file = "watchdog-3.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c9d8c8ec7efb887333cf71e328e39cffbf771d8f8f95d308ea4125bf5f90ba64"}, - {file = "watchdog-3.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0e06ab8858a76e1219e68c7573dfeba9dd1c0219476c5a44d5333b01d7e1743a"}, - {file = "watchdog-3.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:d00e6be486affb5781468457b21a6cbe848c33ef43f9ea4a73b4882e5f188a44"}, - {file = "watchdog-3.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:c07253088265c363d1ddf4b3cdb808d59a0468ecd017770ed716991620b8f77a"}, - {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:5113334cf8cf0ac8cd45e1f8309a603291b614191c9add34d33075727a967709"}, - {file = "watchdog-3.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:51f90f73b4697bac9c9a78394c3acbbd331ccd3655c11be1a15ae6fe289a8c83"}, - {file = "watchdog-3.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:ba07e92756c97e3aca0912b5cbc4e5ad802f4557212788e72a72a47ff376950d"}, - {file = "watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d429c2430c93b7903914e4db9a966c7f2b068dd2ebdd2fa9b9ce094c7d459f33"}, - {file = "watchdog-3.0.0-py3-none-win32.whl", hash = "sha256:3ed7c71a9dccfe838c2f0b6314ed0d9b22e77d268c67e015450a29036a81f60f"}, - {file = "watchdog-3.0.0-py3-none-win_amd64.whl", hash = "sha256:4c9956d27be0bb08fc5f30d9d0179a855436e655f046d288e2bcc11adfae893c"}, - {file = "watchdog-3.0.0-py3-none-win_ia64.whl", hash = "sha256:5d9f3a10e02d7371cd929b5d8f11e87d4bad890212ed3901f9b4d68767bee759"}, - {file = "watchdog-3.0.0.tar.gz", hash = "sha256:4d98a320595da7a7c5a18fc48cb633c2e73cda78f93cac2ef42d42bf609a33f9"}, -] - -[package.extras] -watchmedo = ["PyYAML (>=3.10)"] - [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c8bffe50f97eac9eb940805add5c23e9711e708ef880fdf63d9fc3dfb58e4ad4" +content-hash = "81fd3c5daa1d551b619488776b7d186ccd4e0d9a0638ecf2dc65efd7c129fc45" diff --git a/docker/lwua-ingest/lwua-py/pyproject.toml b/docker/lwua-ingest/lwua-py/pyproject.toml index 4fa77c9..9fa1089 100644 --- a/docker/lwua-ingest/lwua-py/pyproject.toml +++ b/docker/lwua-ingest/lwua-py/pyproject.toml @@ -2,7 +2,7 @@ name = "lwua" version = "0.0.0" description = "lifewatch user analysis" -authors = ["Laurian Van Maldeghem "] +authors = ["Laurian Van Maldeghem ", "Cedric Decruw ", "Marc Portier "] license = "CC0" [tool.poetry.dependencies] @@ -11,7 +11,6 @@ apscheduler = "^3.10.4" pyaml = "^23.9.3" python-dotenv = "^1.0.0" sparqlwrapper = "^2.0.0" -watchdog = "^3.0.0" [build-system] From 866c1b83c6bf428e870f689056f5a59b012e86b9 Mon Sep 17 00:00:00 2001 From: cedricd Date: Fri, 17 Nov 2023 13:16:04 +0100 Subject: [PATCH 30/60] added rdf2j and refactoring of the graph functions watcher now has observer added templates so no sparql is written in graph_functions.py refactoring graph functions.py --- data/project.ttl | 3 - .../lwua-py/lwua/graph_functions.py | 225 +++++++++++------- docker/lwua-ingest/lwua-py/lwua/ingest.py | 2 +- .../lwua-py/lwua/templates/delete_data.sparql | 13 + .../lwua-py/lwua/templates/get_admin.sparql | 9 + .../lwua-py/lwua/templates/insert_data.sparql | 12 + .../lwua-py/lwua/templates/update.sparql | 30 +++ .../lwua/{tryout_watch.py => watcher.py} | 47 ++-- docker/lwua-ingest/lwua-py/poetry.lock | 129 +++++++++- docker/lwua-ingest/lwua-py/pyproject.toml | 1 + 10 files changed, 349 insertions(+), 122 deletions(-) create mode 100644 docker/lwua-ingest/lwua-py/lwua/templates/delete_data.sparql create mode 100644 docker/lwua-ingest/lwua-py/lwua/templates/get_admin.sparql create mode 100644 docker/lwua-ingest/lwua-py/lwua/templates/insert_data.sparql create mode 100644 docker/lwua-ingest/lwua-py/lwua/templates/update.sparql rename docker/lwua-ingest/lwua-py/lwua/{tryout_watch.py => watcher.py} (84%) diff --git a/data/project.ttl b/data/project.ttl index 90aca3d..bf11489 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -15,9 +15,6 @@ a schema:Person ; schema:name "Laurian van Maldghem"^^xsd:string . - a schema:Person ; - schema:name "AlainProvist"^^xsd:string . - a schema:Person ; schema:name "Marc Portier"^^xsd:string . diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py index 912dcf7..b00a2bb 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py +++ b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py @@ -9,9 +9,10 @@ import os from dotenv import load_dotenv from .helpers import enable_logging, resolve_path +from pyrdfj2 import Filters, J2RDFSyntaxBuilder log = logging.getLogger(__name__) -URN_BASE = "urn:lwua:INGEST" +URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") def data_path_from_config(): local_default = str(resolve_path("./data", versus="dotenv")) @@ -36,32 +37,129 @@ def gdb_from_config(): gdb.method = 'POST' return gdb +def get_j2rdf_builder(): + template_folder = resolve_path("./lwua/templates", versus="dotenv") + #log.info(f"template_folder == {template_folder}") + #init J2RDFSyntaxBuilder + j2rdf = J2RDFSyntaxBuilder(templates_folder=template_folder) + return j2rdf + +def update_context_admin(context: str, gdb: SPARQLWrapper = None, lastmod: str= None): + """Update the last modified time of a context in the admin graph + + Args: + context (str): The context to update the last modified time of + gdb (SPARQLWrapper, optional): the SPARQLWrapper to post to. Defaults to None. + lastmod (str, optional): epoch time. Defaults to None. + """ + log.info(f"update_context_admin on {context}") + j2rdf = get_j2rdf_builder() + + # check if context is IRI compliant + context = check_iri_compliance(context) + + #variables for the template + template = "update.sparql" + vars = { + "admin_graph": admin_context(), + "context": context, + "lastmod": lastmod + } + #get the sparql query + query = j2rdf.build_syntax(template, **vars) + #log.debug(f"update_context_admin query == {query}") + #execute the query + gdb.setQuery(query) + gdb.query() + +def check_iri_compliance(context: str): + if URN_BASE not in context: + context = named_context(context) + return context + + +def insert_data(graph: Graph, context: str = None, gdb: SPARQLWrapper = None): + """Insert data into a context in the graph database + + Args: + graph (Graph): The graph to insert data from + context (str): The context to insert data into + gdb (SPARQLWrapper): The SPARQLWrapper to post to + """ + + log.info(f"insert_data into {context}") + + # Get the SPARQLWrapper + gdb = gdb_from_config() if gdb is None else gdb + + # Initialize the J2RDFSyntaxBuilder + j2rdf = get_j2rdf_builder() + + #check if context is IRI compliant if context is not None + context = check_iri_compliance(context) if context is not None else None + + # Variables for the template + template = "insert_data.sparql" + ntstr = graph.serialize(format="nt") + vars = { + "context": context, + "data": ntstr + } + + # Get the SPARQL query + query = j2rdf.build_syntax(template, **vars) + #log.debug(f"insert_data query == {query}") + + # Execute the query + gdb.setQuery(query) + gdb.query() + +def delete_data(context: str = None, gdb: SPARQLWrapper = None): + """Delete data from a context in the graph database + + Args: + context (str): The context to delete data from (if None, delete all data) + gdb (SPARQLWrapper): The SPARQLWrapper to post to + """ + + log.info(f"delete_data on {context}") + + # Get the SPARQLWrapper + gdb = gdb_from_config() if gdb is None else gdb + + # Initialize the J2RDFSyntaxBuilder + j2rdf = get_j2rdf_builder() + + #check if context is IRI compliant + context = check_iri_compliance(context) if context is not None else None + + # Variables for the template + template = "delete_data.sparql" + vars = { + "context": context + } + + # Get the SPARQL query + query = j2rdf.build_syntax(template, **vars) + + # Execute the query + gdb.setQuery(query) + gdb.query() + def ingest_graph(graph: Graph, context: str = None, replace: bool = False): log.debug(f"to insert data into <{ context }>") - gdb = gdb_from_config() - # do the cleanup if possible if replace and context is not None: log.debug(f"deleting <{ context }> before insert") - delete_graph(context, gdb) - - # extract the triples and format the insert statement - ntstr = graph.serialize(format="nt") - #log.debug(f"extracted tiples == { ntstr }") # comented out because it is too verbose - if context is not None: - inserts = f"INSERT DATA {{ GRAPH <{ context }> {{ { ntstr } }} }}" - else: - inserts = f"INSERT DATA {{ { ntstr} }}" - #log.debug(f"INSERT of ==> { inserts }") - - gdb.setQuery(inserts) - log.debug(f"detected querytype == {gdb.queryType}") - - # unsure if this is needed -- can sqlwrapper detect this automatically? - gdb.queryType = 'INSERT' # important to indicate that the update Endpoint should be used - gdb.query() - register_context_lastmod(context,gdb) + delete_data(context, gdb) + + # insert the data + insert_data(graph, context, gdb) + + #get the time + c_time = time.time() + update_context_admin(context, gdb, c_time) def named_context(name: str, base: str = URN_BASE): return f"{base}:{name}" # TODO maybe consider something else? @@ -73,81 +171,33 @@ def fname_2_context(fname: str): def admin_context(): return named_context("ADMIN") -def register_context_lastmod(context: str, gdb: SPARQLWrapper = None): - if URN_BASE not in context: - context = named_context(context) - if gdb is None: - gdb = gdb_from_config() - - c_time = time.time() - - #check if the context is already in the admin graph, if so check if the last modified time of the return is smaller than the current time - #if so, update the last modified time of the context - #if not, do nothing - lastmod = get_context_lastmod(context,gdb) - if lastmod['results']['bindings']: - if float(lastmod['results']['bindings'][0]['m']['value']) < c_time: - #delete the old last modified time - delete_context_from_admin(context,gdb) - - #tsparql for insert - # GRAPH <{ admin_context() }> {{ <{ context }> \"{time.time()}\"^^ }} - inserts = f"INSERT DATA {{ GRAPH <{ admin_context() }> {{ <{ context }> \"{time.time()}\"^^ }} }}" - gdb.setQuery(inserts) - gdb.queryType = 'INSERT' - gdb.query() - -def get_context_lastmod(context: str, gdb: SPARQLWrapper = None): - if URN_BASE not in context: - context = named_context(context) - if gdb is None: - gdb = gdb_from_config() - #get last modified time of context - query = f"SELECT ?m WHERE {{ GRAPH <{ admin_context() }> {{ <{ context }> ?m }} }}" - gdb.setQuery(query) - gdb.setReturnFormat(JSON) - results = gdb.query().convert() - return results - def get_admin_graph(gdb: SPARQLWrapper = None): + + log.info(f"get_admin_graph") + if gdb is None: gdb = gdb_from_config() - #get full admin graph - query = f"SELECT ?g ?m WHERE {{ GRAPH <{ admin_context() }> {{ ?g ?m }} }}" + + j2rdf = get_j2rdf_builder() + template = "get_admin.sparql" + vars = { + "admin_context": admin_context() + } + query = j2rdf.build_syntax(template, **vars) + #log.debug(f"get_admin_graph query == {query}") gdb.setQuery(query) gdb.setReturnFormat(JSON) results = gdb.query().convert() return results - -def delete_context_from_admin(context: str, gdb: SPARQLWrapper = None): - if URN_BASE not in context: - context = named_context(context) - if gdb is None: - gdb = gdb_from_config() - deletes = f"DELETE WHERE {{ GRAPH <{ admin_context() }> {{ <{ context }> ?m }} }}" - gdb.setQuery(deletes) - gdb.queryType = 'DELETE' - gdb.query() - def delete_all_graphs(gdb): - deletes = f"DELETE WHERE {{ GRAPH ?g {{ ?s ?p ?o }} }}" - gdb.setQuery(deletes) - gdb.queryType = 'DELETE' - gdb.query() + delete_data(None, gdb) def delete_graph(context: str,gdb: SPARQLWrapper= None): - if URN_BASE not in context: - context = named_context(context) if gdb is None: gdb = gdb_from_config() - deletes = f"DELETE WHERE {{ GRAPH <{ context }> {{ ?s ?p ?o }} }}" - gdb.setQuery(deletes) - gdb.queryType = 'DELETE' - gdb.query() - - #delete the graph from the admin graph - delete_context_from_admin(context,gdb) + delete_data(context, gdb) + update_context_admin(context, gdb, None) def suffix_2_format(suffix): if suffix in ["ttl", "turtle"]: @@ -167,16 +217,11 @@ def delete_data_file(fname): log.info(f"deleting {fname} from {context}") delete_graph(context) -def ingest_data_file(fname, context: str = None, replace: bool = False): +def ingest_data_file(fname, replace: bool = False): file_path = data_path_from_config() / fname assert file_path.exists(), f"cannot ingest file at {file_path}" - graph = read_graph(file_path) - - if context is None: - context = fname_2_context(fname) - + context = fname_2_context(fname) log.info(f"ingesting {file_path} into {context} | replace : {replace}") - ingest_graph(graph, context=context, replace=replace) # TODO maintain metadata triples last-ingest / last-modified of ingested file in some admin graph context diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 3c7ae2d..21bef6f 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -5,7 +5,7 @@ import os from dotenv import load_dotenv from .helpers import enable_logging, resolve_path -from .tryout_watch import Watcher +from .watcher import Watcher from .graph_functions import ingest_data_file, data_path_from_config diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/delete_data.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/delete_data.sparql new file mode 100644 index 0000000..c0afe36 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/templates/delete_data.sparql @@ -0,0 +1,13 @@ +{# + This template is used to generate SPARQL DELETE WHERE queries. + This template takes 1 parameter: + - context: the context from which the data is to be deleted. If none is given, all data is deleted. +#} + +DELETE WHERE { + {% if context %} + GRAPH <{{ context }}> { ?s ?p ?o } + {% else %} + ?s ?p ?o + {% endif %} +} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/get_admin.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/get_admin.sparql new file mode 100644 index 0000000..7cb2cd9 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/templates/get_admin.sparql @@ -0,0 +1,9 @@ +{# + This template is used to generate SPARQL SELECT queries. + This template takes 1 parameter: + - admin_context: the context from which the data is to be selected +#} + +SELECT ?g ?m WHERE { + GRAPH <{{ admin_context }}> { ?g ?m } +} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/insert_data.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/insert_data.sparql new file mode 100644 index 0000000..2749b8a --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/templates/insert_data.sparql @@ -0,0 +1,12 @@ +{# + This template is used to generate SPARQL INSERT DATA queries. + The template takes two parameters: + - data: the data to be inserted + - context: the context in which the data should be inserted +#} + +{% if context %} +INSERT DATA { GRAPH <{{ context }}> { {{ data }} } } +{% else %} +INSERT DATA { {{ data }} } +{% endif %} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/update.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/update.sparql new file mode 100644 index 0000000..036665b --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/templates/update.sparql @@ -0,0 +1,30 @@ +{# + This template generates a SPARQL query to update a context in an admin graph. + + Variables: + - admin_graph: The IRI of the admin graph. + - context: The IRI of the context to update. + - lastmod: The new modification date. If provided, the context will be inserted again with this date. + +#} + +PREFIX schema: +DELETE { + GRAPH <{{ admin_graph }}> { + <{{ context }}> schema:dateModified ?date . + } +} +INSERT { + {% if lastmod %} + GRAPH <{{ admin_graph }}> { + <{{ context }}> schema:dateModified "{{ lastmod }}"^^xsd:dateTime . + } + {% endif %} +} +WHERE { + OPTIONAL { + GRAPH <{{ admin_graph }}> { + <{{ context }}> schema:dateModified ?date . + } + } +} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py b/docker/lwua-ingest/lwua-py/lwua/watcher.py similarity index 84% rename from docker/lwua-ingest/lwua-py/lwua/tryout_watch.py rename to docker/lwua-ingest/lwua-py/lwua/watcher.py index 83a4324..3235ff8 100644 --- a/docker/lwua-ingest/lwua-py/lwua/tryout_watch.py +++ b/docker/lwua-ingest/lwua-py/lwua/watcher.py @@ -8,20 +8,19 @@ log = logging.getLogger(__name__) -URN_BASE = "urn:lwua:INGEST" # !TODO: shouldn't this be in the env file? +URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") -### Cedric Decruw - 2023-11-16 ### -# ! watchdog was not used due to the fact that the docker container was not able to access the host filesystem -# ! this is a simple file watcher that will be used to trigger the ingest process -class Observer: - def __init__(self, directory): - self.directory = directory +class Watcher: + def __init__(self, directory_to_watch): + self.directory_to_watch = resolve_path(os.getenv("GDB_DATA_FOLDER", os.path.join("/root/graphdb-import",directory_to_watch)), "dotenv").absolute() + log.info(f"env pointing to { self.directory_to_watch }") self.files = self.get_all_files() - + self.first_loop = True + def get_all_files(self): - return {f: [os.path.getmtime(f)] for f in Path(self.directory).glob('**/*') if f.is_file()} + return {f: [os.path.getmtime(f)] for f in Path(self.directory_to_watch).glob('**/*') if f.is_file()} - def update(self): + def observe(self): new_files = self.get_all_files() added = {f: new_files[f] for f in new_files if f not in self.files} deleted = {f: self.files[f] for f in self.files if f not in new_files} @@ -34,13 +33,6 @@ def update(self): self.files = new_files return added, deleted, modified -class Watcher: - def __init__(self, directory_to_watch): - self.directory_to_watch = resolve_path(os.getenv("GDB_DATA_FOLDER", os.path.join("/root/graphdb-import",directory_to_watch)), "dotenv").absolute() - log.info(f"env pointing to { self.directory_to_watch }") - self.observer = Observer(self.directory_to_watch) - self.first_loop = True - def run(self): try: while True: @@ -53,26 +45,27 @@ def run(self): #get admin graph admin_graph = get_admin_graph() info_admin = [(g['g']['value'].replace(f"{URN_BASE}:", ''), g['m']['value']) for g in admin_graph['results']['bindings']] - all_files = self.observer.get_all_files() + all_files = self.get_all_files() #compare the files to the admin graph # if filename is not in the admin graph, add it # if filename is in the admin graph, check if the last modified time is the same, if not update the graph # if filename is in the admin graph, but not in the files, delete the graph info_admin_dict = {g[0]: g[1] for g in info_admin} + + for g in info_admin: + if g[0] not in all_files: + log.info(f"File {g[0]} has been deleted since downtime, deleting graph") + delete_data_file(g[0]) + for f in all_files: if f in info_admin_dict: # !TODO: check variables since the modified now just deletes the graph :/ if info_admin_dict[f] < all_files[f][0]: log.info(f"File {f} has been modified since downtime, updating graph") - ingest_data_file(f, None, True) + ingest_data_file(f, True) continue log.info(f"File {f} has been added since downtime, adding graph") ingest_data_file(f) - - for g in info_admin: - if g[0] not in all_files: - log.info(f"File {g[0]} has been deleted since downtime, deleting graph") - delete_data_file(g[0]) except Exception as e: log.error(f"error: {e}") @@ -83,7 +76,7 @@ def run(self): self.first_loop = False log.info("Checking for updates") - added, deleted, modified = self.observer.update() + added, deleted, modified = self.observe() for f in added: log.info(f"File {f} has been added ") ingest_data_file(f) @@ -92,7 +85,7 @@ def run(self): delete_data_file(f) for f in modified: log.info(f"File {f} has been modified") - ingest_data_file(f,None, True) + ingest_data_file(f, True) time.sleep(5) except KeyboardInterrupt: @@ -105,4 +98,4 @@ def run(self): file_to_watch = resolve_path(os.getenv("GDB_DATA_FOLDER", "/root/graphdb-import/data"), "dotenv").absolute() log.info(f"env pointing to { file_to_watch }") w = Watcher(file_to_watch) - w.run() + w.run() \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/poetry.lock b/docker/lwua-ingest/lwua-py/poetry.lock index 4f1e3c3..658b8b1 100644 --- a/docker/lwua-ingest/lwua-py/poetry.lock +++ b/docker/lwua-ingest/lwua-py/poetry.lock @@ -42,6 +42,92 @@ files = [ [package.dependencies] six = "*" +[[package]] +name = "jinja2" +version = "3.1.2" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "markupsafe" +version = "2.1.3" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, +] + [[package]] name = "pyaml" version = "23.9.3" @@ -73,6 +159,36 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pyrdfj2" +version = "0.0.5" +description = "Python wrapper on jinja SPARQL templating" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "pyrdfj2-0.0.5-py3-none-any.whl", hash = "sha256:fa8dabb66668345d6da64a58e9cead75b02090abd3727a9577140db10b00d6ce"}, + {file = "pyrdfj2-0.0.5.tar.gz", hash = "sha256:6d840015f7d493313c7fa432a5bef924da5cb9c5d76c39bd237caa36fcc1476a"}, +] + +[package.dependencies] +jinja2 = "*" +python-dateutil = "*" +uritemplate = "*" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + [[package]] name = "python-dotenv" version = "1.0.0" @@ -237,7 +353,18 @@ tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] +[[package]] +name = "uritemplate" +version = "4.1.1" +description = "Implementation of RFC 6570 URI Templates" +optional = false +python-versions = ">=3.6" +files = [ + {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, + {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, +] + [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "81fd3c5daa1d551b619488776b7d186ccd4e0d9a0638ecf2dc65efd7c129fc45" +content-hash = "9941a602fa033117b70c1af850ec4604fc06ab9c24ef8c9cef23108565d6c1df" diff --git a/docker/lwua-ingest/lwua-py/pyproject.toml b/docker/lwua-ingest/lwua-py/pyproject.toml index 9fa1089..3039ff0 100644 --- a/docker/lwua-ingest/lwua-py/pyproject.toml +++ b/docker/lwua-ingest/lwua-py/pyproject.toml @@ -11,6 +11,7 @@ apscheduler = "^3.10.4" pyaml = "^23.9.3" python-dotenv = "^1.0.0" sparqlwrapper = "^2.0.0" +pyrdfj2 = "^0.0.5" [build-system] From 36d73e44747f9eefe159625456a71220de59e70b Mon Sep 17 00:00:00 2001 From: cedricd Date: Fri, 17 Nov 2023 17:50:59 +0100 Subject: [PATCH 31/60] Update graph_functions.py --- docker/lwua-ingest/lwua-py/lwua/graph_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py index b00a2bb..c414b81 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py +++ b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py @@ -9,7 +9,7 @@ import os from dotenv import load_dotenv from .helpers import enable_logging, resolve_path -from pyrdfj2 import Filters, J2RDFSyntaxBuilder +from pyrdfj2 import J2RDFSyntaxBuilder log = logging.getLogger(__name__) URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") From 8fa637b6d9287eb80f2620c7be75024a055312c7 Mon Sep 17 00:00:00 2001 From: cedricd Date: Fri, 17 Nov 2023 23:16:40 +0100 Subject: [PATCH 32/60] deleted / commented out non used imports --- docker/lwua-ingest/lwua-py/lwua/__main__.py | 2 -- .../lwua-ingest/lwua-py/lwua/graph_functions.py | 4 ++-- docker/lwua-ingest/lwua-py/lwua/ingest.py | 16 +--------------- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/__main__.py b/docker/lwua-ingest/lwua-py/lwua/__main__.py index c7ab2d8..2cfe663 100644 --- a/docker/lwua-ingest/lwua-py/lwua/__main__.py +++ b/docker/lwua-ingest/lwua-py/lwua/__main__.py @@ -9,10 +9,8 @@ import sys import logging - log = logging.getLogger(__name__) - class IngestDaemon(Daemon): def run(self): try: diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py index c414b81..fef4e03 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py +++ b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py @@ -7,8 +7,8 @@ from rdflib import Graph import time import os -from dotenv import load_dotenv -from .helpers import enable_logging, resolve_path +#from dotenv import load_dotenv +from .helpers import resolve_path #,enable_logging from pyrdfj2 import J2RDFSyntaxBuilder log = logging.getLogger(__name__) diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 21bef6f..73ac954 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -1,10 +1,6 @@ -from SPARQLWrapper import SPARQLWrapper, JSON -from pathlib import Path import logging -from rdflib import Graph -import os from dotenv import load_dotenv -from .helpers import enable_logging, resolve_path +from .helpers import enable_logging #, resolve_path from .watcher import Watcher from .graph_functions import ingest_data_file, data_path_from_config @@ -19,16 +15,6 @@ def run_ingest(): #init watcher on data_path w = Watcher(data_path) w.run() - - # TODO -- immplement steps - # list all the contents (files) in data_path together with last mod - # get the <#admin-luwa-ingest> graph listing the maintained named-graphs and their lastmod - # there nees to be a mapping between filenames and named-graphs ! - # check which filenames are younger then their named-graph equivalent - # read them into mem - replace the coresponding named-graph in the repo - # update the triple for the named-graph to lastmod in the admin grap - - # Note: this main method allows to locally test outside docker # directly connecting to a localhost graphdb endpoint (which might be inside docker itself) From ee7434523465583d614b11d11ff4a75192eb187b Mon Sep 17 00:00:00 2001 From: cedricd Date: Sat, 18 Nov 2023 15:28:55 +0100 Subject: [PATCH 33/60] performed autopep8 and black on all python files --- docker/lwua-ingest/Dockerfile | 4 +- docker/lwua-ingest/lwua-py/lwua/__main__.py | 1 + docker/lwua-ingest/lwua-py/lwua/daemon.py | 45 +++---- .../lwua-py/lwua/graph_functions.py | 127 ++++++++++-------- docker/lwua-ingest/lwua-py/lwua/helpers.py | 21 +-- docker/lwua-ingest/lwua-py/lwua/ingest.py | 13 +- docker/lwua-ingest/lwua-py/lwua/schedule.py | 3 +- docker/lwua-ingest/lwua-py/lwua/watcher.py | 81 +++++++---- 8 files changed, 178 insertions(+), 117 deletions(-) diff --git a/docker/lwua-ingest/Dockerfile b/docker/lwua-ingest/Dockerfile index 1a59c3d..899b04f 100644 --- a/docker/lwua-ingest/Dockerfile +++ b/docker/lwua-ingest/Dockerfile @@ -34,5 +34,5 @@ ENV PATH="/lwua-py/.venv/bin:$PATH" # ensure we have the build folder from the poetry stage of this image COPY --from=poetry /lwua-py /lwua-py -RUN rm /lwua-py/logging && ln -s /logging /lwua-py/logging -ENTRYPOINT ["python", "-m", "lwua"] +RUN rm -f /lwua-py/logging && ln -s /logging /lwua-py/logging +ENTRYPOINT ["python", "-m", "lwua"] \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/__main__.py b/docker/lwua-ingest/lwua-py/lwua/__main__.py index 2cfe663..ee3dbf9 100644 --- a/docker/lwua-ingest/lwua-py/lwua/__main__.py +++ b/docker/lwua-ingest/lwua-py/lwua/__main__.py @@ -11,6 +11,7 @@ log = logging.getLogger(__name__) + class IngestDaemon(Daemon): def run(self): try: diff --git a/docker/lwua-ingest/lwua-py/lwua/daemon.py b/docker/lwua-ingest/lwua-py/lwua/daemon.py index 157cd10..8ea9d46 100644 --- a/docker/lwua-ingest/lwua-py/lwua/daemon.py +++ b/docker/lwua-ingest/lwua-py/lwua/daemon.py @@ -23,8 +23,7 @@ def __init__(self, pidfile): self.pidfile = str(pidfile) def daemonize(self): - """ Deamonize class. UNIX double fork mechanism. - """ + """Deamonize class. UNIX double fork mechanism.""" try: pid = os.fork() @@ -32,11 +31,11 @@ def daemonize(self): # exit first parent sys.exit(0) except OSError as err: - log.exception(f'fork #1 failed: {err}') + log.exception(f"fork #1 failed: {err}") sys.exit(1) # decouple from parent environment - os.chdir('/') + os.chdir("/") os.setsid() os.umask(0) @@ -44,19 +43,18 @@ def daemonize(self): try: pid = os.fork() if pid > 0: - # exit from second parent sys.exit(0) except OSError as err: - log.exception(f'fork #2 failed: {err}') + log.exception(f"fork #2 failed: {err}") sys.exit(1) # redirect standard file descriptors sys.stdout.flush() sys.stderr.flush() - si = open(os.devnull, 'r') - so = open(os.devnull, 'a+') - se = open(os.devnull, 'a+') + si = open(os.devnull, "r") + so = open(os.devnull, "a+") + se = open(os.devnull, "a+") os.dup2(si.fileno(), sys.stdin.fileno()) os.dup2(so.fileno(), sys.stdout.fileno()) @@ -67,20 +65,19 @@ def daemonize(self): pid = str(os.getpid()) log.info(f"creating pidfile {self.pidfile} - containing {pid}") - with open(self.pidfile, 'w+') as f: - f.write(pid + '\n') + with open(self.pidfile, "w+") as f: + f.write(pid + "\n") def delpid(self): log.info("atexit -- service run completed -- removing pidfile") os.remove(self.pidfile) def start(self): - """Start the daemon. - """ + """Start the daemon.""" # Check for a pidfile to see if the daemon already runs try: - with open(self.pidfile, 'r') as pf: + with open(self.pidfile, "r") as pf: pid = int(pf.read().strip()) except IOError: pid = None @@ -95,25 +92,26 @@ def start(self): self.run() def stop(self): - """Stop the daemon. - """ + """Stop the daemon.""" # Get the pid from the pidfile try: - with open(self.pidfile, 'r') as pf: + with open(self.pidfile, "r") as pf: pid = int(pf.read().strip()) except IOError: pid = None if not pid: - message = f"pidfile {self.pidfile} does not exist. Daemon not running?\n" + message = ( + f"pidfile {self.pidfile} does not exist. Daemon not running?\n" + ) sys.stderr.write(message) return # not an error in a restart # Try killing the daemon process try: - while 1: + while True: os.kill(pid, signal.SIGTERM) time.sleep(0.1) except OSError as err: @@ -126,8 +124,7 @@ def stop(self): sys.exit(1) def restart(self): - """Restart the daemon. - """ + """Restart the daemon.""" self.stop() self.start() @@ -139,10 +136,12 @@ def run(self): start() or restart(). """ - CMDS = ['start', 'stop', 'restart', 'run'] + CMDS = ["start", "stop", "restart", "run"] def _usage(self): - print(f"run this daemon script with one argument == {'|'.join(Daemon.CMDS)}") + print( + f"run this daemon script with one argument == {'|'.join(Daemon.CMDS)}" + ) def _cmd(self, argv): if len(argv) != 2: diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py index fef4e03..f785e35 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py +++ b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py @@ -1,30 +1,36 @@ # this file will be used to trigger the injest processes into graphdb -# this prevents the injest.py and tryout_watch.py to be run at the same time with conflicting circular imports +# this prevents the injest.py and tryout_watch.py to be run at the same +# time with conflicting circular imports from SPARQLWrapper import SPARQLWrapper, JSON +from datetime import datetime from pathlib import Path import logging from rdflib import Graph import time import os -#from dotenv import load_dotenv -from .helpers import resolve_path #,enable_logging + +# from dotenv import load_dotenv +from .helpers import resolve_path # ,enable_logging from pyrdfj2 import J2RDFSyntaxBuilder log = logging.getLogger(__name__) URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") + def data_path_from_config(): local_default = str(resolve_path("./data", versus="dotenv")) folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) return Path(folder_name).absolute() + def gdb_from_config(): base = os.getenv("GDB_BASE", "http://localhost:7200") repoid = os.getenv("GDB_REPO", "lwua23") endpoint = f"{ base }/repositories/{ repoid }" - updateEndpoint = endpoint + "/statements" # update statements are handled at other endpoint + # update statements are handled at other endpoint + updateEndpoint = endpoint + "/statements" log.debug(f"using endpoint {endpoint}") @@ -32,19 +38,23 @@ def gdb_from_config(): endpoint=endpoint, updateEndpoint=updateEndpoint, returnFormat=JSON, - agent="lwua-python-sparql-client" + agent="lwua-python-sparql-client", ) - gdb.method = 'POST' + gdb.method = "POST" return gdb + def get_j2rdf_builder(): template_folder = resolve_path("./lwua/templates", versus="dotenv") - #log.info(f"template_folder == {template_folder}") - #init J2RDFSyntaxBuilder + # log.info(f"template_folder == {template_folder}") + # init J2RDFSyntaxBuilder j2rdf = J2RDFSyntaxBuilder(templates_folder=template_folder) return j2rdf -def update_context_admin(context: str, gdb: SPARQLWrapper = None, lastmod: str= None): + +def update_context_admin( + context: str, gdb: SPARQLWrapper = None, lastmod: str = None +): """Update the last modified time of a context in the admin graph Args: @@ -54,30 +64,31 @@ def update_context_admin(context: str, gdb: SPARQLWrapper = None, lastmod: str= """ log.info(f"update_context_admin on {context}") j2rdf = get_j2rdf_builder() - + # check if context is IRI compliant context = check_iri_compliance(context) - - #variables for the template + + # variables for the template template = "update.sparql" vars = { "admin_graph": admin_context(), "context": context, - "lastmod": lastmod + "lastmod": lastmod, } - #get the sparql query + # get the sparql query query = j2rdf.build_syntax(template, **vars) - #log.debug(f"update_context_admin query == {query}") - #execute the query + # log.debug(f"update_context_admin query == {query}") + # execute the query gdb.setQuery(query) gdb.query() - + + def check_iri_compliance(context: str): if URN_BASE not in context: context = named_context(context) return context - + def insert_data(graph: Graph, context: str = None, gdb: SPARQLWrapper = None): """Insert data into a context in the graph database @@ -86,34 +97,32 @@ def insert_data(graph: Graph, context: str = None, gdb: SPARQLWrapper = None): context (str): The context to insert data into gdb (SPARQLWrapper): The SPARQLWrapper to post to """ - + log.info(f"insert_data into {context}") - + # Get the SPARQLWrapper gdb = gdb_from_config() if gdb is None else gdb - + # Initialize the J2RDFSyntaxBuilder j2rdf = get_j2rdf_builder() - - #check if context is IRI compliant if context is not None + + # check if context is IRI compliant if context is not None context = check_iri_compliance(context) if context is not None else None # Variables for the template template = "insert_data.sparql" ntstr = graph.serialize(format="nt") - vars = { - "context": context, - "data": ntstr - } + vars = {"context": context, "data": ntstr} # Get the SPARQL query query = j2rdf.build_syntax(template, **vars) - #log.debug(f"insert_data query == {query}") - + # log.debug(f"insert_data query == {query}") + # Execute the query gdb.setQuery(query) gdb.query() - + + def delete_data(context: str = None, gdb: SPARQLWrapper = None): """Delete data from a context in the graph database @@ -121,23 +130,21 @@ def delete_data(context: str = None, gdb: SPARQLWrapper = None): context (str): The context to delete data from (if None, delete all data) gdb (SPARQLWrapper): The SPARQLWrapper to post to """ - + log.info(f"delete_data on {context}") - + # Get the SPARQLWrapper gdb = gdb_from_config() if gdb is None else gdb - + # Initialize the J2RDFSyntaxBuilder j2rdf = get_j2rdf_builder() - - #check if context is IRI compliant + + # check if context is IRI compliant context = check_iri_compliance(context) if context is not None else None # Variables for the template template = "delete_data.sparql" - vars = { - "context": context - } + vars = {"context": context} # Get the SPARQL query query = j2rdf.build_syntax(template, **vars) @@ -145,7 +152,8 @@ def delete_data(context: str = None, gdb: SPARQLWrapper = None): # Execute the query gdb.setQuery(query) gdb.query() - + + def ingest_graph(graph: Graph, context: str = None, replace: bool = False): log.debug(f"to insert data into <{ context }>") gdb = gdb_from_config() @@ -153,52 +161,59 @@ def ingest_graph(graph: Graph, context: str = None, replace: bool = False): if replace and context is not None: log.debug(f"deleting <{ context }> before insert") delete_data(context, gdb) - + # insert the data insert_data(graph, context, gdb) - - #get the time + + # get the time c_time = time.time() - update_context_admin(context, gdb, c_time) + # convert the epoch timestamp to a date string + date_string = datetime.utcfromtimestamp(c_time).isoformat() + update_context_admin(context, gdb, date_string) + def named_context(name: str, base: str = URN_BASE): - return f"{base}:{name}" # TODO maybe consider something else? + return f"{base}:{name}" # TODO maybe consider something else? + def fname_2_context(fname: str): - #return named_context(f"data/{fname}") # /data prefix is not needed until we have multiple data folders to ingest from + # return named_context(f"data/{fname}") # /data prefix is not needed until + # we have multiple data folders to ingest from return named_context(fname) + def admin_context(): return named_context("ADMIN") + def get_admin_graph(gdb: SPARQLWrapper = None): - log.info(f"get_admin_graph") - + if gdb is None: gdb = gdb_from_config() - + j2rdf = get_j2rdf_builder() template = "get_admin.sparql" - vars = { - "admin_context": admin_context() - } + vars = {"admin_context": admin_context()} query = j2rdf.build_syntax(template, **vars) - #log.debug(f"get_admin_graph query == {query}") + # log.debug(f"get_admin_graph query == {query}") gdb.setQuery(query) gdb.setReturnFormat(JSON) results = gdb.query().convert() return results + def delete_all_graphs(gdb): delete_data(None, gdb) -def delete_graph(context: str,gdb: SPARQLWrapper= None): + +def delete_graph(context: str, gdb: SPARQLWrapper = None): if gdb is None: gdb = gdb_from_config() delete_data(context, gdb) update_context_admin(context, gdb, None) + def suffix_2_format(suffix): if suffix in ["ttl", "turtle"]: return "turtle" @@ -207,16 +222,19 @@ def suffix_2_format(suffix): # todo consider others if needed return None + def read_graph(fpath: Path, format: str = None): format = format or suffix_2_format(fpath.suffix) graph: Graph = Graph().parse(location=str(fpath), format=format) return graph + def delete_data_file(fname): context = fname_2_context(fname) log.info(f"deleting {fname} from {context}") delete_graph(context) + def ingest_data_file(fname, replace: bool = False): file_path = data_path_from_config() / fname assert file_path.exists(), f"cannot ingest file at {file_path}" @@ -224,4 +242,5 @@ def ingest_data_file(fname, replace: bool = False): context = fname_2_context(fname) log.info(f"ingesting {file_path} into {context} | replace : {replace}") ingest_graph(graph, context=context, replace=replace) - # TODO maintain metadata triples last-ingest / last-modified of ingested file in some admin graph context + # TODO maintain metadata triples last-ingest / last-modified of ingested + # file in some admin graph context diff --git a/docker/lwua-ingest/lwua-py/lwua/helpers.py b/docker/lwua-ingest/lwua-py/lwua/helpers.py index 93e7575..d6bf75b 100644 --- a/docker/lwua-ingest/lwua-py/lwua/helpers.py +++ b/docker/lwua-ingest/lwua-py/lwua/helpers.py @@ -15,7 +15,7 @@ def yaml_load_file(file): return None # else try: - with open(file, 'r') as yml_file: + with open(file, "r") as yml_file: return yaml.load(yml_file, Loader=yaml.SafeLoader) except Exception as e: log.exception(e) @@ -23,20 +23,21 @@ def yaml_load_file(file): def find_logconf(logconf): - if logconf is None or logconf == '': + if logconf is None or logconf == "": return None for vs in ["dotenv", "module", "work"]: # try in this order logconf_path = resolve_path(logconf, versus=vs) print(f"trying vs {vs} --> {logconf_path} ?") - if (logconf_path.exists()): + if logconf_path.exists(): return logconf_path # else - raise Exception(f"config error logconf file {logconf} not found relative to dotenv, module or pwd") + raise Exception( + f"config error logconf file {logconf} not found relative to dotenv, module or pwd" + ) def enable_logging(logconf: str = None): - """Configures logging based on logconf specified through .env ${LOGCONF} - """ + """Configures logging based on logconf specified through .env ${LOGCONF}""" logconf = os.getenv("LOGCONF") if logconf is None else logconf logconf_path = find_logconf(logconf) if logconf_path is None: @@ -49,14 +50,14 @@ def enable_logging(logconf: str = None): def singleton(class_): - """ Decorator for singleton classes - """ + """Decorator for singleton classes""" instances = {} def getinstance(*args, **kwargs): if class_ not in instances: instances[class_] = class_(*args, **kwargs) return instances[class_] + return getinstance @@ -70,7 +71,9 @@ def getinstance(*args, **kwargs): def resolve_path(location: str, versus: str = "module"): location = location if location else "" - assert versus in LOCATIONS, f"no base path available for coded versus = '{versus}'" + assert ( + versus in LOCATIONS + ), f"no base path available for coded versus = '{versus}'" base: Path = LOCATIONS[versus] log.debug(f"resolve path base='{base}' + rel='{location}'") return Path(base, location).absolute() diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 73ac954..edba625 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -1,6 +1,6 @@ import logging from dotenv import load_dotenv -from .helpers import enable_logging #, resolve_path +from .helpers import enable_logging # , resolve_path from .watcher import Watcher from .graph_functions import ingest_data_file, data_path_from_config @@ -12,16 +12,21 @@ def run_ingest(): data_path = data_path_from_config() log.info(f"run_ingest on updated files in {data_path}") - #init watcher on data_path + # init watcher on data_path w = Watcher(data_path) w.run() + # Note: this main method allows to locally test outside docker -# directly connecting to a localhost graphdb endpoint (which might be inside docker itself) +# directly connecting to a localhost graphdb endpoint (which might be +# inside docker itself) + + def main(): load_dotenv() enable_logging() ingest_data_file("project.ttl") -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/docker/lwua-ingest/lwua-py/lwua/schedule.py b/docker/lwua-ingest/lwua-py/lwua/schedule.py index 70b3e12..532d8a8 100644 --- a/docker/lwua-ingest/lwua-py/lwua/schedule.py +++ b/docker/lwua-ingest/lwua-py/lwua/schedule.py @@ -17,14 +17,13 @@ def main_schedule(): # https://apscheduler.readthedocs.io/en/3.x/userguide.html class LWUAScheduler(BlockingScheduler): - def __init__(self, run_on_start: bool = True): # todo consider injecting interval through .env timeprops: dict = dict(minutes=30) # timeprops: dict = dict(seconds=5) super().__init__() self._run_on_start = run_on_start - self.add_job(lambda: main_schedule(), 'interval', **timeprops) + self.add_job(lambda: main_schedule(), "interval", **timeprops) def start(self): try: diff --git a/docker/lwua-ingest/lwua-py/lwua/watcher.py b/docker/lwua-ingest/lwua-py/lwua/watcher.py index 3235ff8..9ed4ded 100644 --- a/docker/lwua-ingest/lwua-py/lwua/watcher.py +++ b/docker/lwua-ingest/lwua-py/lwua/watcher.py @@ -4,21 +4,36 @@ from dotenv import load_dotenv import logging from lwua.helpers import enable_logging, resolve_path -from lwua.graph_functions import ingest_data_file, delete_data_file, get_admin_graph +from lwua.graph_functions import ( + ingest_data_file, + delete_data_file, + get_admin_graph, +) log = logging.getLogger(__name__) URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") + class Watcher: def __init__(self, directory_to_watch): - self.directory_to_watch = resolve_path(os.getenv("GDB_DATA_FOLDER", os.path.join("/root/graphdb-import",directory_to_watch)), "dotenv").absolute() + self.directory_to_watch = resolve_path( + os.getenv( + "GDB_DATA_FOLDER", + os.path.join("/root/graphdb-import", directory_to_watch), + ), + "dotenv", + ).absolute() log.info(f"env pointing to { self.directory_to_watch }") self.files = self.get_all_files() self.first_loop = True - + def get_all_files(self): - return {f: [os.path.getmtime(f)] for f in Path(self.directory_to_watch).glob('**/*') if f.is_file()} + return { + f: [os.path.getmtime(f)] + for f in Path(self.directory_to_watch).glob("**/*") + if f.is_file() + } def observe(self): new_files = self.get_all_files() @@ -29,7 +44,7 @@ def observe(self): if f in self.files and new_files[f] != self.files[f]: self.files[f] = new_files[f] modified[f] = self.files[f] - + self.files = new_files return added, deleted, modified @@ -38,40 +53,57 @@ def run(self): while True: if self.first_loop: log.info("First time loop") - #try and get the graph , if this fails due to the graphdb server not being up yet, the watcher will try again in 5 seconds untill it succeeds + # try and get the graph , if this fails due to the graphdb + # server not being up yet, the watcher will try again in 5 + # seconds untill it succeeds get_admin = True while get_admin: try: - #get admin graph + # get admin graph admin_graph = get_admin_graph() - info_admin = [(g['g']['value'].replace(f"{URN_BASE}:", ''), g['m']['value']) for g in admin_graph['results']['bindings']] + info_admin = [ + ( + g["g"]["value"].replace( + f"{URN_BASE}:", "" + ), + g["m"]["value"], + ) + for g in admin_graph["results"]["bindings"] + ] all_files = self.get_all_files() - #compare the files to the admin graph + # compare the files to the admin graph # if filename is not in the admin graph, add it # if filename is in the admin graph, check if the last modified time is the same, if not update the graph - # if filename is in the admin graph, but not in the files, delete the graph + # if filename is in the admin graph, but not in the + # files, delete the graph info_admin_dict = {g[0]: g[1] for g in info_admin} - + for g in info_admin: if g[0] not in all_files: - log.info(f"File {g[0]} has been deleted since downtime, deleting graph") + log.info( + f"File {g[0]} has been deleted since downtime, deleting graph" + ) delete_data_file(g[0]) - + for f in all_files: if f in info_admin_dict: # !TODO: check variables since the modified now just deletes the graph :/ if info_admin_dict[f] < all_files[f][0]: - log.info(f"File {f} has been modified since downtime, updating graph") + log.info( + f"File {f} has been modified since downtime, updating graph" + ) ingest_data_file(f, True) continue - log.info(f"File {f} has been added since downtime, adding graph") + log.info( + f"File {f} has been added since downtime, adding graph" + ) ingest_data_file(f) - + except Exception as e: log.error(f"error: {e}") time.sleep(1) continue - + get_admin = False self.first_loop = False @@ -86,16 +118,19 @@ def run(self): for f in modified: log.info(f"File {f} has been modified") ingest_data_file(f, True) - + time.sleep(5) except KeyboardInterrupt: - log.info("Stopping watcher") + log.info("Stopping watcher") + -## test the watcher on local file system - not in docker -if __name__ == '__main__': +# test the watcher on local file system - not in docker +if __name__ == "__main__": load_dotenv() enable_logging() - file_to_watch = resolve_path(os.getenv("GDB_DATA_FOLDER", "/root/graphdb-import/data"), "dotenv").absolute() + file_to_watch = resolve_path( + os.getenv("GDB_DATA_FOLDER", "/root/graphdb-import/data"), "dotenv" + ).absolute() log.info(f"env pointing to { file_to_watch }") w = Watcher(file_to_watch) - w.run() \ No newline at end of file + w.run() From fd4879d8e8580b615c7f5e70e0629580edd631b7 Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Tue, 21 Nov 2023 12:58:43 +0100 Subject: [PATCH 34/60] refactoring of watcher.py , editied templates and graphdb.py functions so that they are more clear on their purpose. --- data/project.ttl | 2 +- .../lwua-py/lwua/graph_functions.py | 246 --------------- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 289 ++++++++++++++++++ docker/lwua-ingest/lwua-py/lwua/helpers.py | 6 + docker/lwua-ingest/lwua-py/lwua/ingest.py | 33 +- ...delete_data.sparql => delete_graph.sparql} | 9 +- .../lwua-py/lwua/templates/get_admin.sparql | 9 - ...insert_data.sparql => insert_graph.sparql} | 6 +- .../lwua/templates/lastmod_info.sparql | 9 + ...e.sparql => update_context_lastmod.sparql} | 8 +- docker/lwua-ingest/lwua-py/lwua/watcher.py | 170 ++++------- 11 files changed, 399 insertions(+), 388 deletions(-) delete mode 100644 docker/lwua-ingest/lwua-py/lwua/graph_functions.py create mode 100644 docker/lwua-ingest/lwua-py/lwua/graphdb.py rename docker/lwua-ingest/lwua-py/lwua/templates/{delete_data.sparql => delete_graph.sparql} (80%) delete mode 100644 docker/lwua-ingest/lwua-py/lwua/templates/get_admin.sparql rename docker/lwua-ingest/lwua-py/lwua/templates/{insert_data.sparql => insert_graph.sparql} (61%) create mode 100644 docker/lwua-ingest/lwua-py/lwua/templates/lastmod_info.sparql rename docker/lwua-ingest/lwua-py/lwua/templates/{update.sparql => update_context_lastmod.sparql} (69%) diff --git a/data/project.ttl b/data/project.ttl index bf11489..8ba941a 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -13,7 +13,7 @@ schema:creator . a schema:Person ; - schema:name "Laurian van Maldghem"^^xsd:string . + schema:name "Laurian van Maldeghem"^^xsd:string . a schema:Person ; schema:name "Marc Portier"^^xsd:string . diff --git a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py b/docker/lwua-ingest/lwua-py/lwua/graph_functions.py deleted file mode 100644 index f785e35..0000000 --- a/docker/lwua-ingest/lwua-py/lwua/graph_functions.py +++ /dev/null @@ -1,246 +0,0 @@ -# this file will be used to trigger the injest processes into graphdb -# this prevents the injest.py and tryout_watch.py to be run at the same -# time with conflicting circular imports - -from SPARQLWrapper import SPARQLWrapper, JSON -from datetime import datetime -from pathlib import Path -import logging -from rdflib import Graph -import time -import os - -# from dotenv import load_dotenv -from .helpers import resolve_path # ,enable_logging -from pyrdfj2 import J2RDFSyntaxBuilder - -log = logging.getLogger(__name__) -URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") - - -def data_path_from_config(): - local_default = str(resolve_path("./data", versus="dotenv")) - folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) - return Path(folder_name).absolute() - - -def gdb_from_config(): - base = os.getenv("GDB_BASE", "http://localhost:7200") - repoid = os.getenv("GDB_REPO", "lwua23") - - endpoint = f"{ base }/repositories/{ repoid }" - # update statements are handled at other endpoint - updateEndpoint = endpoint + "/statements" - - log.debug(f"using endpoint {endpoint}") - - gdb = SPARQLWrapper( - endpoint=endpoint, - updateEndpoint=updateEndpoint, - returnFormat=JSON, - agent="lwua-python-sparql-client", - ) - gdb.method = "POST" - return gdb - - -def get_j2rdf_builder(): - template_folder = resolve_path("./lwua/templates", versus="dotenv") - # log.info(f"template_folder == {template_folder}") - # init J2RDFSyntaxBuilder - j2rdf = J2RDFSyntaxBuilder(templates_folder=template_folder) - return j2rdf - - -def update_context_admin( - context: str, gdb: SPARQLWrapper = None, lastmod: str = None -): - """Update the last modified time of a context in the admin graph - - Args: - context (str): The context to update the last modified time of - gdb (SPARQLWrapper, optional): the SPARQLWrapper to post to. Defaults to None. - lastmod (str, optional): epoch time. Defaults to None. - """ - log.info(f"update_context_admin on {context}") - j2rdf = get_j2rdf_builder() - - # check if context is IRI compliant - context = check_iri_compliance(context) - - # variables for the template - template = "update.sparql" - vars = { - "admin_graph": admin_context(), - "context": context, - "lastmod": lastmod, - } - # get the sparql query - query = j2rdf.build_syntax(template, **vars) - # log.debug(f"update_context_admin query == {query}") - # execute the query - gdb.setQuery(query) - gdb.query() - - -def check_iri_compliance(context: str): - if URN_BASE not in context: - context = named_context(context) - return context - - -def insert_data(graph: Graph, context: str = None, gdb: SPARQLWrapper = None): - """Insert data into a context in the graph database - - Args: - graph (Graph): The graph to insert data from - context (str): The context to insert data into - gdb (SPARQLWrapper): The SPARQLWrapper to post to - """ - - log.info(f"insert_data into {context}") - - # Get the SPARQLWrapper - gdb = gdb_from_config() if gdb is None else gdb - - # Initialize the J2RDFSyntaxBuilder - j2rdf = get_j2rdf_builder() - - # check if context is IRI compliant if context is not None - context = check_iri_compliance(context) if context is not None else None - - # Variables for the template - template = "insert_data.sparql" - ntstr = graph.serialize(format="nt") - vars = {"context": context, "data": ntstr} - - # Get the SPARQL query - query = j2rdf.build_syntax(template, **vars) - # log.debug(f"insert_data query == {query}") - - # Execute the query - gdb.setQuery(query) - gdb.query() - - -def delete_data(context: str = None, gdb: SPARQLWrapper = None): - """Delete data from a context in the graph database - - Args: - context (str): The context to delete data from (if None, delete all data) - gdb (SPARQLWrapper): The SPARQLWrapper to post to - """ - - log.info(f"delete_data on {context}") - - # Get the SPARQLWrapper - gdb = gdb_from_config() if gdb is None else gdb - - # Initialize the J2RDFSyntaxBuilder - j2rdf = get_j2rdf_builder() - - # check if context is IRI compliant - context = check_iri_compliance(context) if context is not None else None - - # Variables for the template - template = "delete_data.sparql" - vars = {"context": context} - - # Get the SPARQL query - query = j2rdf.build_syntax(template, **vars) - - # Execute the query - gdb.setQuery(query) - gdb.query() - - -def ingest_graph(graph: Graph, context: str = None, replace: bool = False): - log.debug(f"to insert data into <{ context }>") - gdb = gdb_from_config() - # do the cleanup if possible - if replace and context is not None: - log.debug(f"deleting <{ context }> before insert") - delete_data(context, gdb) - - # insert the data - insert_data(graph, context, gdb) - - # get the time - c_time = time.time() - # convert the epoch timestamp to a date string - date_string = datetime.utcfromtimestamp(c_time).isoformat() - update_context_admin(context, gdb, date_string) - - -def named_context(name: str, base: str = URN_BASE): - return f"{base}:{name}" # TODO maybe consider something else? - - -def fname_2_context(fname: str): - # return named_context(f"data/{fname}") # /data prefix is not needed until - # we have multiple data folders to ingest from - return named_context(fname) - - -def admin_context(): - return named_context("ADMIN") - - -def get_admin_graph(gdb: SPARQLWrapper = None): - log.info(f"get_admin_graph") - - if gdb is None: - gdb = gdb_from_config() - - j2rdf = get_j2rdf_builder() - template = "get_admin.sparql" - vars = {"admin_context": admin_context()} - query = j2rdf.build_syntax(template, **vars) - # log.debug(f"get_admin_graph query == {query}") - gdb.setQuery(query) - gdb.setReturnFormat(JSON) - results = gdb.query().convert() - return results - - -def delete_all_graphs(gdb): - delete_data(None, gdb) - - -def delete_graph(context: str, gdb: SPARQLWrapper = None): - if gdb is None: - gdb = gdb_from_config() - delete_data(context, gdb) - update_context_admin(context, gdb, None) - - -def suffix_2_format(suffix): - if suffix in ["ttl", "turtle"]: - return "turtle" - if suffix in ["jsonld", "json"]: - return "json-ld" - # todo consider others if needed - return None - - -def read_graph(fpath: Path, format: str = None): - format = format or suffix_2_format(fpath.suffix) - graph: Graph = Graph().parse(location=str(fpath), format=format) - return graph - - -def delete_data_file(fname): - context = fname_2_context(fname) - log.info(f"deleting {fname} from {context}") - delete_graph(context) - - -def ingest_data_file(fname, replace: bool = False): - file_path = data_path_from_config() / fname - assert file_path.exists(), f"cannot ingest file at {file_path}" - graph = read_graph(file_path) - context = fname_2_context(fname) - log.info(f"ingesting {file_path} into {context} | replace : {replace}") - ingest_graph(graph, context=context, replace=replace) - # TODO maintain metadata triples last-ingest / last-modified of ingested - # file in some admin graph context diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py new file mode 100644 index 0000000..5cb8ca7 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -0,0 +1,289 @@ +# this file will be used to trigger the injest processes into graphdb +# this prevents the injest.py and tryout_watch.py to be run at the same +# time with conflicting circular imports + +from SPARQLWrapper import SPARQLWrapper, JSON +from datetime import datetime +from pathlib import Path +import logging +from rdflib import Graph +import time +import os + +# from dotenv import load_dotenv +from .helpers import resolve_path, singleton, data_path_from_config # ,enable_logging +from pyrdfj2 import J2RDFSyntaxBuilder + +log = logging.getLogger(__name__) +URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") + +def gdb_from_config(): + base = os.getenv("GDB_BASE", "http://localhost:7200") + repoid = os.getenv("GDB_REPO", "lwua23") + + endpoint = f"{ base }/repositories/{ repoid }" + # update statements are handled at other endpoint + updateEndpoint = endpoint + "/statements" + + log.debug(f"using endpoint {endpoint}") + + gdb = SPARQLWrapper( + endpoint=endpoint, + updateEndpoint=updateEndpoint, + returnFormat=JSON, + agent="lwua-python-sparql-client", + ) + gdb.method = "POST" + return gdb + +gdb = gdb_from_config() + +@singleton +def get_j2rdf_builder(): + template_folder = resolve_path("./lwua/templates") + log.info(f"template_folder == {template_folder}") + # init J2RDFSyntaxBuilder + j2rdf = J2RDFSyntaxBuilder(templates_folder=template_folder) + return j2rdf + + +def update_registry_lastmod( + context: str, lastmod: str = None +): + """ + Update the administration of a context. + + :param context: The context to update. + :type context: str + :param lastmod: The date string to update with. + :type lastmod: str + """ + log.info(f"update registry_of_lastmod_context on {context}") + j2rdf = get_j2rdf_builder() + + # check if context is IRI compliant + assert_iri_compliance(context) + + # variables for the template + template = "update_context_lastmod.sparql" + vars = { + "registry_of_lastmod_context": registry_of_lastmod_context(), + "context": context, + "lastmod": lastmod, + } + # get the sparql query + query = j2rdf.build_syntax(template, **vars) + # log.debug(f"update_registry_lastmod query == {query}") + # execute the query + gdb.setQuery(query) + gdb.query() + + +def assert_iri_compliance(context: str): + assert URN_BASE in context, f"Context {context} is not IRI compliant" + + +def insert_graph(graph: Graph, context: str = None): + """ + Insert data into a context. + + :param graph: The graph to insert data from. + :type graph: Graph + :param context: The context to insert data into. + :type context: str + """ + + log.info(f"insert_graph into {context}") + assert_context_exists(context) + # Initialize the J2RDFSyntaxBuilder + j2rdf = get_j2rdf_builder() + + assert_iri_compliance(context) if context is not None else None + + # Variables for the template + template = "insert_graph.sparql" + ntstr = graph.serialize(format="nt") + vars = {"context": context, "raw_triples": ntstr} + + # Get the SPARQL query + query = j2rdf.build_syntax(template, **vars) + # log.debug(f"insert_graph query == {query}") + + # Execute the query + gdb.setQuery(query) + gdb.query() + + +def assert_context_exists(context: str): + assert context is not None, "Context cannot be None" + + +def delete_graph(context: str): + """ + Delete data from a context. + + :param context: The context to delete data from. + :type context: str + """ + + log.info(f"delete_graph on {context}") + assert_context_exists(context) + + # Initialize the J2RDFSyntaxBuilder + j2rdf = get_j2rdf_builder() + + # check if context is IRI compliant + assert_iri_compliance(context) if context is not None else None + + # Variables for the template + template = "delete_graph.sparql" + vars = {"context": context} + + # Get the SPARQL query + query = j2rdf.build_syntax(template, **vars) + + # Execute the query + gdb.setQuery(query) + gdb.query() + + +def ingest_graph(graph: Graph, lastmod, context: str, replace: bool = False): + """ + Convert a filename to a context. + + :param fname: The filename to convert. + :type fname: str + :return: The context corresponding to the filename. + :rtype: str + """ + log.debug(f"to insert data into <{ context }>") + assert_context_exists(context) + # do the cleanup if possible + if replace and context is not None: + log.debug(f"deleting <{ context }> before insert") + delete_graph(context) + + # insert the data + insert_graph(graph, context) + + # convert the epoch timestamp to a date string + date_string = datetime.utcfromtimestamp(lastmod).isoformat() + update_registry_lastmod(context, date_string) + + +def named_context(name: str, base: str = URN_BASE): + """ + Create a named context. + + :param name: The name of the context. + :type name: str + :param base: The base of the context. Defaults to URN_BASE. + :type base: str, optional + :return: The named context. + :rtype: str + """ + return f"{base}:{name}" # TODO maybe consider something else? + +def context_2_fname(context: str): + """ + Convert a context to a filename path. + + :param context: The context to convert. + :type context: str + :return: The filename corresponding to the context. + :rtype: str + """ + return Path(context.replace(f"{URN_BASE}:", "")) + + +def fname_2_context(fname: str): + """ + Convert a filename to a context. + + :param fname: The filename to convert. + :type fname: str + :return: The context corresponding to the filename. + :rtype: str + """ + return named_context(fname) + +def date_2_epoch(date: str): + """ + Convert a date string to an epoch timestamp. + + :param date: The date string to convert. + :type date: str + :return: The epoch timestamp corresponding to the date string. + :rtype: float + """ + return datetime.fromisoformat(date).timestamp() + + +def registry_of_lastmod_context(): + return named_context("ADMIN") + + +def get_registry_of_lastmod(): + log.info(f"getting last modified graph") + + j2rdf = get_j2rdf_builder() + template = "lastmod_info.sparql" + vars = {"context": registry_of_lastmod_context()} + query = j2rdf.build_syntax(template, **vars) + # log.debug(f"get_admin_graph query == {query}") + gdb.setQuery(query) + gdb.setReturnFormat(JSON) + results = gdb.query().convert() + + # convert {'head': {'vars': ['graph', 'lastmod']}, 'results': {'bindings': []}} to [{PosixPath('graph'): lastmod}] + # URI must be substracted from graph context and datetime str must be converted to epoch + + converted = {} + for g in results["results"]["bindings"]: + path = context_2_fname(g["graph"]["value"]) + time = date_2_epoch(g["lastmod"]["value"]) + converted[path] = time + return converted + + +def suffix_2_format(suffix): + if suffix in ["ttl", "turtle"]: + return "turtle" + if suffix in ["jsonld", "json"]: + return "json-ld" + # todo consider others if needed + return None + + +def read_graph(fpath: Path, format: str = None): + format = format or suffix_2_format(fpath.suffix) + graph: Graph = Graph().parse(location=str(fpath), format=format) + return graph + + +def delete_data_file(fname): + context = fname_2_context(fname) + log.info(f"deleting {fname} from {context}") + assert_context_exists(context) + delete_graph(context) + update_registry_lastmod(context, None) + + +def ingest_data_file(fname, lastmod, replace: bool = True): + """ + Ingest a data file. + + :param fname: The name of the file to ingest. + :type fname: str + :param replace: Whether to replace the existing data. Defaults to False. + :type replace: bool + :raises AssertionError: If the file does not exist. + """ + file_path = data_path_from_config() / fname + assert file_path.exists(), f"cannot ingest file at {file_path}" + graph = read_graph(file_path) + context = fname_2_context(fname) + log.info(f"ingesting {file_path} into {context} | replace : {replace}") + ingest_graph(graph, lastmod, context=context, replace=replace) + # TODO maintain metadata triples last-ingest / last-modified of ingested + # file in some admin graph context diff --git a/docker/lwua-ingest/lwua-py/lwua/helpers.py b/docker/lwua-ingest/lwua-py/lwua/helpers.py index d6bf75b..9442438 100644 --- a/docker/lwua-ingest/lwua-py/lwua/helpers.py +++ b/docker/lwua-ingest/lwua-py/lwua/helpers.py @@ -77,3 +77,9 @@ def resolve_path(location: str, versus: str = "module"): base: Path = LOCATIONS[versus] log.debug(f"resolve path base='{base}' + rel='{location}'") return Path(base, location).absolute() + + +def data_path_from_config(): + local_default = str(resolve_path("./data", versus="dotenv")) + folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) + return Path(folder_name).absolute() \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index edba625..d429bf8 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -1,20 +1,37 @@ import logging +import time from dotenv import load_dotenv -from .helpers import enable_logging # , resolve_path -from .watcher import Watcher -from .graph_functions import ingest_data_file, data_path_from_config +from SPARQLWrapper import SPARQLWrapper, JSON +from pathlib import Path +from .helpers import enable_logging, data_path_from_config +from .watcher import FolderChangeDetector, IngestChangeObserver +from .graphdb import ingest_data_file, get_registry_of_lastmod log = logging.getLogger(__name__) -URN_BASE = "urn:lwua:INGEST" - def run_ingest(): data_path = data_path_from_config() log.info(f"run_ingest on updated files in {data_path}") - # init watcher on data_path - w = Watcher(data_path) - w.run() + + # get the last context graph modification dates + # run while true loop with 5 second sleep + detector = FolderChangeDetector(data_path) + ingestor = IngestChangeObserver() + last_mod = None + while last_mod is None: + try: + last_mod = get_registry_of_lastmod() + log.info(f"initial last mod == {last_mod}") + except Exception as e: + log.exception(e) + time.sleep(2) + + while True: + log.info("reporting changes") + last_mod = detector.report_changes(last_mod,ingestor) + log.info(f"last_mod == {last_mod}") + time.sleep(5) # Note: this main method allows to locally test outside docker diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/delete_data.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/delete_graph.sparql similarity index 80% rename from docker/lwua-ingest/lwua-py/lwua/templates/delete_data.sparql rename to docker/lwua-ingest/lwua-py/lwua/templates/delete_graph.sparql index c0afe36..d487b9f 100644 --- a/docker/lwua-ingest/lwua-py/lwua/templates/delete_data.sparql +++ b/docker/lwua-ingest/lwua-py/lwua/templates/delete_graph.sparql @@ -3,11 +3,8 @@ This template takes 1 parameter: - context: the context from which the data is to be deleted. If none is given, all data is deleted. #} - +{% if context %} DELETE WHERE { - {% if context %} GRAPH <{{ context }}> { ?s ?p ?o } - {% else %} - ?s ?p ?o - {% endif %} -} \ No newline at end of file +} +{% endif %} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/get_admin.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/get_admin.sparql deleted file mode 100644 index 7cb2cd9..0000000 --- a/docker/lwua-ingest/lwua-py/lwua/templates/get_admin.sparql +++ /dev/null @@ -1,9 +0,0 @@ -{# - This template is used to generate SPARQL SELECT queries. - This template takes 1 parameter: - - admin_context: the context from which the data is to be selected -#} - -SELECT ?g ?m WHERE { - GRAPH <{{ admin_context }}> { ?g ?m } -} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/insert_data.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/insert_graph.sparql similarity index 61% rename from docker/lwua-ingest/lwua-py/lwua/templates/insert_data.sparql rename to docker/lwua-ingest/lwua-py/lwua/templates/insert_graph.sparql index 2749b8a..12353e1 100644 --- a/docker/lwua-ingest/lwua-py/lwua/templates/insert_data.sparql +++ b/docker/lwua-ingest/lwua-py/lwua/templates/insert_graph.sparql @@ -1,12 +1,12 @@ {# This template is used to generate SPARQL INSERT DATA queries. The template takes two parameters: - - data: the data to be inserted + - raw_triples: the data to be inserted - context: the context in which the data should be inserted #} {% if context %} -INSERT DATA { GRAPH <{{ context }}> { {{ data }} } } +INSERT DATA { GRAPH <{{ context }}> { {{ raw_triples }} } } {% else %} -INSERT DATA { {{ data }} } +INSERT DATA { {{ raw_triples }} } {% endif %} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/lastmod_info.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/lastmod_info.sparql new file mode 100644 index 0000000..cd01ea6 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/templates/lastmod_info.sparql @@ -0,0 +1,9 @@ +{# + This template is used to generate SPARQL SELECT queries. + This template takes 1 parameter: + - context: the context from which the data is to be selected +#} + +SELECT ?graph ?lastmod WHERE { + GRAPH <{{ context }}> { ?graph ?lastmod } +} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/update.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql similarity index 69% rename from docker/lwua-ingest/lwua-py/lwua/templates/update.sparql rename to docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql index 036665b..ec8f3a6 100644 --- a/docker/lwua-ingest/lwua-py/lwua/templates/update.sparql +++ b/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql @@ -2,7 +2,7 @@ This template generates a SPARQL query to update a context in an admin graph. Variables: - - admin_graph: The IRI of the admin graph. + - registry_of_lastmod_context: The IRI of the registry graph that will keep track of last modification dates. - context: The IRI of the context to update. - lastmod: The new modification date. If provided, the context will be inserted again with this date. @@ -10,20 +10,20 @@ PREFIX schema: DELETE { - GRAPH <{{ admin_graph }}> { + GRAPH <{{ registry_of_lastmod_context }}> { <{{ context }}> schema:dateModified ?date . } } INSERT { {% if lastmod %} - GRAPH <{{ admin_graph }}> { + GRAPH <{{ registry_of_lastmod_context }}> { <{{ context }}> schema:dateModified "{{ lastmod }}"^^xsd:dateTime . } {% endif %} } WHERE { OPTIONAL { - GRAPH <{{ admin_graph }}> { + GRAPH <{{ registry_of_lastmod_context }}> { <{{ context }}> schema:dateModified ?date . } } diff --git a/docker/lwua-ingest/lwua-py/lwua/watcher.py b/docker/lwua-ingest/lwua-py/lwua/watcher.py index 9ed4ded..5e92caa 100644 --- a/docker/lwua-ingest/lwua-py/lwua/watcher.py +++ b/docker/lwua-ingest/lwua-py/lwua/watcher.py @@ -1,128 +1,76 @@ import time +import datetime import os from pathlib import Path from dotenv import load_dotenv +from abc import ABC, abstractmethod import logging from lwua.helpers import enable_logging, resolve_path -from lwua.graph_functions import ( +from lwua.graphdb import ( ingest_data_file, delete_data_file, - get_admin_graph, + get_registry_of_lastmod, ) log = logging.getLogger(__name__) -URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") +class FolderChangeObserver(ABC): + @abstractmethod + def added(self, fname: str, lastmod: datetime = None): + pass + + @abstractmethod + def removed(self, fname: str): + pass + + @abstractmethod + def changed(self, fname: str, lastmod: datetime = None): + pass + +class FolderChangeDetector: + def __init__(self, folder_to_inspect): + self.root = Path(folder_to_inspect) + while not self.root.exists(): + log.info(f"Waiting for {self.root} to exist") + time.sleep(1) + log.info(f"Watching {self.root}") + + def report_changes(self, known_lastmod_by_fname: dict = {}, observer: FolderChangeObserver = None): + current_lastmod_by_fname = {p: os.path.getmtime(p) for p in self.root.glob('**/*') if p.is_file()} + log.info(f"current_lastmod_by_fname: {current_lastmod_by_fname}") + for fname in known_lastmod_by_fname: + if fname not in current_lastmod_by_fname: + observer.removed(fname) + for fname, lastmod in current_lastmod_by_fname.items(): + if fname not in known_lastmod_by_fname: + log.info(f"new file {fname} with lastmod {lastmod}") + observer.added(fname, lastmod) + elif lastmod > known_lastmod_by_fname[fname]: + observer.changed(fname, lastmod) + + return current_lastmod_by_fname + +class IngestChangeObserver(FolderChangeObserver): + def __init__(self): + pass + + def removed(self, fname): + # Implement the deletion of graph context and update of lastmod registry + log.info(f"File {fname} has been deleted") + delete_data_file(fname) + + def added(self, fname, lastmod): + # Implement the addition of graph in context + log.info(f"File {fname} has been added") + ingest_data_file(fname, lastmod) + + def changed(self, fname, lastmod): + # Implement the replacement of graph in context and update the lastmod registry + log.info(f"File {fname} has been modified") + ingest_data_file(fname,lastmod, True) + -class Watcher: - def __init__(self, directory_to_watch): - self.directory_to_watch = resolve_path( - os.getenv( - "GDB_DATA_FOLDER", - os.path.join("/root/graphdb-import", directory_to_watch), - ), - "dotenv", - ).absolute() - log.info(f"env pointing to { self.directory_to_watch }") - self.files = self.get_all_files() - self.first_loop = True - - def get_all_files(self): - return { - f: [os.path.getmtime(f)] - for f in Path(self.directory_to_watch).glob("**/*") - if f.is_file() - } - - def observe(self): - new_files = self.get_all_files() - added = {f: new_files[f] for f in new_files if f not in self.files} - deleted = {f: self.files[f] for f in self.files if f not in new_files} - modified = {} - for f in new_files: - if f in self.files and new_files[f] != self.files[f]: - self.files[f] = new_files[f] - modified[f] = self.files[f] - - self.files = new_files - return added, deleted, modified - - def run(self): - try: - while True: - if self.first_loop: - log.info("First time loop") - # try and get the graph , if this fails due to the graphdb - # server not being up yet, the watcher will try again in 5 - # seconds untill it succeeds - get_admin = True - while get_admin: - try: - # get admin graph - admin_graph = get_admin_graph() - info_admin = [ - ( - g["g"]["value"].replace( - f"{URN_BASE}:", "" - ), - g["m"]["value"], - ) - for g in admin_graph["results"]["bindings"] - ] - all_files = self.get_all_files() - # compare the files to the admin graph - # if filename is not in the admin graph, add it - # if filename is in the admin graph, check if the last modified time is the same, if not update the graph - # if filename is in the admin graph, but not in the - # files, delete the graph - info_admin_dict = {g[0]: g[1] for g in info_admin} - - for g in info_admin: - if g[0] not in all_files: - log.info( - f"File {g[0]} has been deleted since downtime, deleting graph" - ) - delete_data_file(g[0]) - - for f in all_files: - if f in info_admin_dict: - # !TODO: check variables since the modified now just deletes the graph :/ - if info_admin_dict[f] < all_files[f][0]: - log.info( - f"File {f} has been modified since downtime, updating graph" - ) - ingest_data_file(f, True) - continue - log.info( - f"File {f} has been added since downtime, adding graph" - ) - ingest_data_file(f) - - except Exception as e: - log.error(f"error: {e}") - time.sleep(1) - continue - - get_admin = False - self.first_loop = False - - log.info("Checking for updates") - added, deleted, modified = self.observe() - for f in added: - log.info(f"File {f} has been added ") - ingest_data_file(f) - for f in deleted: - log.info(f"File {f} has been deleted") - delete_data_file(f) - for f in modified: - log.info(f"File {f} has been modified") - ingest_data_file(f, True) - - time.sleep(5) - except KeyboardInterrupt: - log.info("Stopping watcher") - # test the watcher on local file system - not in docker if __name__ == "__main__": From 9f8cfdc73760b5816dd4a7a9c0e7d0c588fd740c Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:13:50 +0100 Subject: [PATCH 35/60] beginning of tests --- .../lwua-py/lwua/tests/all_tests.py | 28 ++ .../lwua-py/lwua/tests/test_graphdb.py | 44 ++ .../lwua-py/lwua/tests/test_ingest.py | 26 ++ docker/lwua-ingest/lwua-py/poetry.lock | 441 +++++++++++++++++- docker/lwua-ingest/lwua-py/pyproject.toml | 4 + 5 files changed, 542 insertions(+), 1 deletion(-) create mode 100644 docker/lwua-ingest/lwua-py/lwua/tests/all_tests.py create mode 100644 docker/lwua-ingest/lwua-py/lwua/tests/test_graphdb.py create mode 100644 docker/lwua-ingest/lwua-py/lwua/tests/test_ingest.py diff --git a/docker/lwua-ingest/lwua-py/lwua/tests/all_tests.py b/docker/lwua-ingest/lwua-py/lwua/tests/all_tests.py new file mode 100644 index 0000000..d17f806 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/tests/all_tests.py @@ -0,0 +1,28 @@ +import unittest +import os +from testcontainers.compose import DockerCompose + +class DockerTestSuite(unittest.TestSuite): + def setUp(self): + self.docker_compose = DockerCompose("../../../../", "docker-compose.yml") + self.docker_compose.start() + + def tearDown(self): + self.docker_compose.stop() + + def run(self, result=None): + self.setUp() + super(DockerTestSuite, self).run(result) + self.tearDown() + +if __name__ == "__main__": + # Start test suite + loader = unittest.TestLoader() + current_dir_path = os.path.dirname(os.path.realpath(__file__)) + non_docker_suite = loader.discover(current_dir_path, pattern='test_*.py') + suite = DockerTestSuite(loader.discover(current_dir_path, pattern='docker_test_*.py')) + + # Run the tests + runner = unittest.TextTestRunner() + runner.run(suite) + runner.run(non_docker_suite) \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/tests/test_graphdb.py b/docker/lwua-ingest/lwua-py/lwua/tests/test_graphdb.py new file mode 100644 index 0000000..4bdd437 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/tests/test_graphdb.py @@ -0,0 +1,44 @@ +import unittest +from unittest.mock import patch, MagicMock +from pathlib import Path +import lwua.graphdb as graphdb + +class TestGraphDB(unittest.TestCase): + @patch('graphdb.suffix_2_format') + @patch('graphdb.Graph') + def test_read_graph(self, mock_graph, mock_suffix_2_format): + # Arrange + mock_suffix_2_format.return_value = 'xml' + mock_graph_instance = mock_graph.return_value + mock_graph_instance.parse.return_value = None + + # Act + result = graphdb.read_graph(Path('test.xml')) + + # Assert + mock_suffix_2_format.assert_called_once_with('.xml') + mock_graph.assert_called_once() + mock_graph_instance.parse.assert_called_once_with(location='test.xml', format='xml') + self.assertIsNone(result) + + @patch('graphdb.fname_2_context') + @patch('graphdb.log.info') + @patch('graphdb.assert_context_exists') + @patch('graphdb.delete_graph') + @patch('graphdb.update_registry_lastmod') + def test_delete_data_file(self, mock_update_registry_lastmod, mock_delete_graph, mock_assert_context_exists, mock_log_info, mock_fname_2_context): + # Arrange + mock_fname_2_context.return_value = 'context' + + # Act + graphdb.delete_data_file('test.xml') + + # Assert + mock_fname_2_context.assert_called_once_with('test.xml') + mock_log_info.assert_called_once() + mock_assert_context_exists.assert_called_once_with('context') + mock_delete_graph.assert_called_once_with('context') + mock_update_registry_lastmod.assert_called_once_with('context', None) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/tests/test_ingest.py b/docker/lwua-ingest/lwua-py/lwua/tests/test_ingest.py new file mode 100644 index 0000000..206d7bb --- /dev/null +++ b/docker/lwua-ingest/lwua-py/lwua/tests/test_ingest.py @@ -0,0 +1,26 @@ +import unittest +from unittest.mock import patch, MagicMock +from lwua.ingest import run_ingest + +class TestIngest(unittest.TestCase): + @patch('ingest.get_registry_of_lastmod') + @patch('ingest.FolderChangeDetector') + @patch('ingest.IngestChangeObserver') + def test_run_ingest(self, mock_observer, mock_detector, mock_lastmod): + # Arrange + mock_lastmod.return_value = None + mock_detector_instance = mock_detector.return_value + mock_detector_instance.report_changes.return_value = None + mock_observer_instance = mock_observer.return_value + + # Act + run_ingest() + + # Assert + mock_lastmod.assert_called_once() + mock_detector.assert_called_once() + mock_observer.assert_called_once() + mock_detector_instance.report_changes.assert_called() + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/poetry.lock b/docker/lwua-ingest/lwua-py/poetry.lock index 658b8b1..107077a 100644 --- a/docker/lwua-ingest/lwua-py/poetry.lock +++ b/docker/lwua-ingest/lwua-py/poetry.lock @@ -28,6 +28,198 @@ tornado = ["tornado (>=4.3)"] twisted = ["twisted"] zookeeper = ["kazoo"] +[[package]] +name = "certifi" +version = "2023.11.17" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"}, + {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "deprecation" +version = "2.1.0" +description = "A library to handle automated deprecations" +optional = false +python-versions = "*" +files = [ + {file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"}, + {file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"}, +] + +[package.dependencies] +packaging = "*" + +[[package]] +name = "docker" +version = "6.1.3" +description = "A Python library for the Docker Engine API." +optional = false +python-versions = ">=3.7" +files = [ + {file = "docker-6.1.3-py3-none-any.whl", hash = "sha256:aecd2277b8bf8e506e484f6ab7aec39abe0038e29fa4a6d3ba86c3fe01844ed9"}, + {file = "docker-6.1.3.tar.gz", hash = "sha256:aa6d17830045ba5ef0168d5eaa34d37beeb113948c413affe1d5991fc11f9a20"}, +] + +[package.dependencies] +packaging = ">=14.0" +pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""} +requests = ">=2.26.0" +urllib3 = ">=1.26.0" +websocket-client = ">=0.32.0" + +[package.extras] +ssh = ["paramiko (>=2.4.3)"] + +[[package]] +name = "exceptiongroup" +version = "1.2.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "isodate" version = "0.6.1" @@ -128,6 +320,32 @@ files = [ {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, ] +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + [[package]] name = "pyaml" version = "23.9.3" @@ -175,6 +393,28 @@ jinja2 = "*" python-dateutil = "*" uritemplate = "*" +[[package]] +name = "pytest" +version = "7.4.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, + {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + [[package]] name = "python-dateutil" version = "2.8.2" @@ -214,6 +454,29 @@ files = [ {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + [[package]] name = "pyyaml" version = "6.0.1" @@ -294,6 +557,27 @@ html = ["html5lib (>=1.0,<2.0)"] lxml = ["lxml (>=4.3.0,<5.0.0)"] networkx = ["networkx (>=2.0.0,<3.0.0)"] +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + [[package]] name = "six" version = "1.16.0" @@ -325,6 +609,50 @@ docs = ["sphinx (<5)", "sphinx-rtd-theme"] keepalive = ["keepalive (>=0.5)"] pandas = ["pandas (>=1.3.5)"] +[[package]] +name = "testcontainers" +version = "3.7.1" +description = "Library provides lightweight, throwaway instances of common databases, Selenium web browsers, or anything else that can run in a Docker container" +optional = false +python-versions = ">=3.7" +files = [ + {file = "testcontainers-3.7.1-py2.py3-none-any.whl", hash = "sha256:7f48cef4bf0ccd78f1a4534d4b701a003a3bace851f24eae58a32f9e3f0aeba0"}, +] + +[package.dependencies] +deprecation = "*" +docker = ">=4.0.0" +wrapt = "*" + +[package.extras] +arangodb = ["python-arango"] +azurite = ["azure-storage-blob"] +clickhouse = ["clickhouse-driver"] +docker-compose = ["docker-compose"] +google-cloud-pubsub = ["google-cloud-pubsub (<2)"] +kafka = ["kafka-python"] +keycloak = ["python-keycloak"] +mongo = ["pymongo"] +mssqlserver = ["pymssql"] +mysql = ["pymysql", "sqlalchemy"] +neo4j = ["neo4j"] +oracle = ["cx-Oracle", "sqlalchemy"] +postgresql = ["psycopg2-binary", "sqlalchemy"] +rabbitmq = ["pika"] +redis = ["redis"] +selenium = ["selenium"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + [[package]] name = "tzdata" version = "2023.3" @@ -364,7 +692,118 @@ files = [ {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, ] +[[package]] +name = "urllib3" +version = "2.1.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.1.0-py3-none-any.whl", hash = "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3"}, + {file = "urllib3-2.1.0.tar.gz", hash = "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "websocket-client" +version = "1.6.4" +description = "WebSocket client for Python with low level API options" +optional = false +python-versions = ">=3.8" +files = [ + {file = "websocket-client-1.6.4.tar.gz", hash = "sha256:b3324019b3c28572086c4a319f91d1dcd44e6e11cd340232978c684a7650d0df"}, + {file = "websocket_client-1.6.4-py3-none-any.whl", hash = "sha256:084072e0a7f5f347ef2ac3d8698a5e0b4ffbfcab607628cadabc650fc9a83a24"}, +] + +[package.extras] +docs = ["Sphinx (>=6.0)", "sphinx-rtd-theme (>=1.1.0)"] +optional = ["python-socks", "wsaccel"] +test = ["websockets"] + +[[package]] +name = "wrapt" +version = "1.16.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = ">=3.6" +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "9941a602fa033117b70c1af850ec4604fc06ab9c24ef8c9cef23108565d6c1df" +content-hash = "7dad55ddd1bcaa7d9577b84accaa24c4c625a36770351e121ca9aa0475479baa" diff --git a/docker/lwua-ingest/lwua-py/pyproject.toml b/docker/lwua-ingest/lwua-py/pyproject.toml index 3039ff0..ef31599 100644 --- a/docker/lwua-ingest/lwua-py/pyproject.toml +++ b/docker/lwua-ingest/lwua-py/pyproject.toml @@ -14,6 +14,10 @@ sparqlwrapper = "^2.0.0" pyrdfj2 = "^0.0.5" +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.3" +testcontainers = "^3.7.1" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" From c916ec5a68cafe5b2bfdbe147df6624ec0459624 Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Wed, 22 Nov 2023 21:19:10 +0100 Subject: [PATCH 36/60] changed const variables and reverted changes on update context lastmod --- data/project.ttl | 3 + docker/graphdb/Dockerfile | 2 +- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 106 ++++++++---------- .../lwua/templates/lastmod_info.sparql | 4 +- .../templates/update_context_lastmod.sparql | 4 + docker/lwua-ingest/lwua-py/lwua/watcher.py | 4 +- .../lwua-py/{lwua => }/tests/all_tests.py | 0 .../lwua-py/{lwua => }/tests/test_graphdb.py | 0 .../lwua-py/{lwua => }/tests/test_ingest.py | 0 9 files changed, 60 insertions(+), 63 deletions(-) rename docker/lwua-ingest/lwua-py/{lwua => }/tests/all_tests.py (100%) rename docker/lwua-ingest/lwua-py/{lwua => }/tests/test_graphdb.py (100%) rename docker/lwua-ingest/lwua-py/{lwua => }/tests/test_ingest.py (100%) diff --git a/data/project.ttl b/data/project.ttl index 8ba941a..3ed9d7a 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -15,6 +15,9 @@ a schema:Person ; schema:name "Laurian van Maldeghem"^^xsd:string . + a schema:Person ; + schema:name "Alain Provist"^^xsd:string . + a schema:Person ; schema:name "Marc Portier"^^xsd:string . diff --git a/docker/graphdb/Dockerfile b/docker/graphdb/Dockerfile index ffa8da6..d860f87 100644 --- a/docker/graphdb/Dockerfile +++ b/docker/graphdb/Dockerfile @@ -4,4 +4,4 @@ RUN mkdir -p /root/graphdb-import/data WORKDIR /root/graphdb-import/data COPY ./initdb /initdb -RUN cd /initdb && sh ./init_graphdb.sh \ No newline at end of file +RUN cd /initdb && ./init_graphdb.sh \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 5cb8ca7..8809cef 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -11,12 +11,25 @@ import os # from dotenv import load_dotenv -from .helpers import resolve_path, singleton, data_path_from_config # ,enable_logging +from .helpers import resolve_path, data_path_from_config # ,enable_logging from pyrdfj2 import J2RDFSyntaxBuilder log = logging.getLogger(__name__) URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") +def named_context(name: str, base: str = URN_BASE): + """ + Create a named context. + + :param name: The name of the context. + :type name: str + :param base: The base of the context. Defaults to URN_BASE. + :type base: str, optional + :return: The named context. + :rtype: str + """ + return f"{base}:{name}" + def gdb_from_config(): base = os.getenv("GDB_BASE", "http://localhost:7200") repoid = os.getenv("GDB_REPO", "lwua23") @@ -27,28 +40,32 @@ def gdb_from_config(): log.debug(f"using endpoint {endpoint}") - gdb = SPARQLWrapper( + GDB = SPARQLWrapper( endpoint=endpoint, updateEndpoint=updateEndpoint, returnFormat=JSON, agent="lwua-python-sparql-client", ) - gdb.method = "POST" - return gdb + GDB.method = "POST" + return GDB -gdb = gdb_from_config() +GDB = gdb_from_config() -@singleton def get_j2rdf_builder(): template_folder = resolve_path("./lwua/templates") log.info(f"template_folder == {template_folder}") # init J2RDFSyntaxBuilder - j2rdf = J2RDFSyntaxBuilder(templates_folder=template_folder) + context = named_context("ADMIN") + j2rdf = J2RDFSyntaxBuilder( + templates_folder=template_folder, + extra_functions = {"registry_of_lastmod_context": context} + ) return j2rdf +J2RDF = get_j2rdf_builder() def update_registry_lastmod( - context: str, lastmod: str = None + context: str, lastmod: datetime ): """ Update the administration of a context. @@ -59,7 +76,7 @@ def update_registry_lastmod( :type lastmod: str """ log.info(f"update registry_of_lastmod_context on {context}") - j2rdf = get_j2rdf_builder() + # check if context is IRI compliant assert_iri_compliance(context) @@ -67,20 +84,19 @@ def update_registry_lastmod( # variables for the template template = "update_context_lastmod.sparql" vars = { - "registry_of_lastmod_context": registry_of_lastmod_context(), "context": context, - "lastmod": lastmod, + "lastmod": lastmod.isoformat() if lastmod is not None else None } # get the sparql query - query = j2rdf.build_syntax(template, **vars) + query = J2RDF.build_syntax(template, **vars) # log.debug(f"update_registry_lastmod query == {query}") # execute the query - gdb.setQuery(query) - gdb.query() + GDB.setQuery(query) + GDB.query() def assert_iri_compliance(context: str): - assert URN_BASE in context, f"Context {context} is not IRI compliant" + assert context.startswith(URN_BASE), f"Context {context} is not IRI compliant" def insert_graph(graph: Graph, context: str = None): @@ -96,7 +112,7 @@ def insert_graph(graph: Graph, context: str = None): log.info(f"insert_graph into {context}") assert_context_exists(context) # Initialize the J2RDFSyntaxBuilder - j2rdf = get_j2rdf_builder() + assert_iri_compliance(context) if context is not None else None @@ -106,12 +122,12 @@ def insert_graph(graph: Graph, context: str = None): vars = {"context": context, "raw_triples": ntstr} # Get the SPARQL query - query = j2rdf.build_syntax(template, **vars) + query = J2RDF.build_syntax(template, **vars) # log.debug(f"insert_graph query == {query}") # Execute the query - gdb.setQuery(query) - gdb.query() + GDB.setQuery(query) + GDB.query() def assert_context_exists(context: str): @@ -130,7 +146,7 @@ def delete_graph(context: str): assert_context_exists(context) # Initialize the J2RDFSyntaxBuilder - j2rdf = get_j2rdf_builder() + # check if context is IRI compliant assert_iri_compliance(context) if context is not None else None @@ -140,14 +156,14 @@ def delete_graph(context: str): vars = {"context": context} # Get the SPARQL query - query = j2rdf.build_syntax(template, **vars) + query = J2RDF.build_syntax(template, **vars) # Execute the query - gdb.setQuery(query) - gdb.query() + GDB.setQuery(query) + GDB.query() -def ingest_graph(graph: Graph, lastmod, context: str, replace: bool = False): +def ingest_graph(graph: Graph, lastmod:datetime, context: str, replace: bool = False): """ Convert a filename to a context. @@ -167,22 +183,10 @@ def ingest_graph(graph: Graph, lastmod, context: str, replace: bool = False): insert_graph(graph, context) # convert the epoch timestamp to a date string - date_string = datetime.utcfromtimestamp(lastmod).isoformat() - update_registry_lastmod(context, date_string) + update_registry_lastmod(context, lastmod) -def named_context(name: str, base: str = URN_BASE): - """ - Create a named context. - :param name: The name of the context. - :type name: str - :param base: The base of the context. Defaults to URN_BASE. - :type base: str, optional - :return: The named context. - :rtype: str - """ - return f"{base}:{name}" # TODO maybe consider something else? def context_2_fname(context: str): """ @@ -207,33 +211,19 @@ def fname_2_context(fname: str): """ return named_context(fname) -def date_2_epoch(date: str): - """ - Convert a date string to an epoch timestamp. - - :param date: The date string to convert. - :type date: str - :return: The epoch timestamp corresponding to the date string. - :rtype: float - """ - return datetime.fromisoformat(date).timestamp() - -def registry_of_lastmod_context(): - return named_context("ADMIN") def get_registry_of_lastmod(): log.info(f"getting last modified graph") - j2rdf = get_j2rdf_builder() template = "lastmod_info.sparql" - vars = {"context": registry_of_lastmod_context()} - query = j2rdf.build_syntax(template, **vars) + vars = {} + query = J2RDF.build_syntax(template, **vars) # log.debug(f"get_admin_graph query == {query}") - gdb.setQuery(query) - gdb.setReturnFormat(JSON) - results = gdb.query().convert() + GDB.setQuery(query) + GDB.setReturnFormat(JSON) + results = GDB.query().convert() # convert {'head': {'vars': ['graph', 'lastmod']}, 'results': {'bindings': []}} to [{PosixPath('graph'): lastmod}] # URI must be substracted from graph context and datetime str must be converted to epoch @@ -241,7 +231,7 @@ def get_registry_of_lastmod(): converted = {} for g in results["results"]["bindings"]: path = context_2_fname(g["graph"]["value"]) - time = date_2_epoch(g["lastmod"]["value"]) + time = datetime.fromisoformat(g["lastmod"]["value"]) converted[path] = time return converted @@ -269,7 +259,7 @@ def delete_data_file(fname): update_registry_lastmod(context, None) -def ingest_data_file(fname, lastmod, replace: bool = True): +def ingest_data_file(fname: str, lastmod: datetime, replace: bool = True): """ Ingest a data file. diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/lastmod_info.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/lastmod_info.sparql index cd01ea6..62464ed 100644 --- a/docker/lwua-ingest/lwua-py/lwua/templates/lastmod_info.sparql +++ b/docker/lwua-ingest/lwua-py/lwua/templates/lastmod_info.sparql @@ -1,9 +1,9 @@ {# This template is used to generate SPARQL SELECT queries. This template takes 1 parameter: - - context: the context from which the data is to be selected + - registry_of_lastmod_context: the context from which the data is to be selected #} SELECT ?graph ?lastmod WHERE { - GRAPH <{{ context }}> { ?graph ?lastmod } + GRAPH <{{ registry_of_lastmod_context }}> { ?graph ?lastmod } } \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql index ec8f3a6..ea63399 100644 --- a/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql +++ b/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql @@ -17,6 +17,10 @@ DELETE { INSERT { {% if lastmod %} GRAPH <{{ registry_of_lastmod_context }}> { + {# + pyrdf2j here is a hack to convert a Python datetime object to an XSD dateTime literal. + <{{ context }}> schema:dateModified "{{ lastmod | xsd('dateTime') }} . + #} <{{ context }}> schema:dateModified "{{ lastmod }}"^^xsd:dateTime . } {% endif %} diff --git a/docker/lwua-ingest/lwua-py/lwua/watcher.py b/docker/lwua-ingest/lwua-py/lwua/watcher.py index 5e92caa..fc51ef9 100644 --- a/docker/lwua-ingest/lwua-py/lwua/watcher.py +++ b/docker/lwua-ingest/lwua-py/lwua/watcher.py @@ -1,5 +1,5 @@ import time -import datetime +from datetime import datetime import os from pathlib import Path from dotenv import load_dotenv @@ -36,7 +36,7 @@ def __init__(self, folder_to_inspect): log.info(f"Watching {self.root}") def report_changes(self, known_lastmod_by_fname: dict = {}, observer: FolderChangeObserver = None): - current_lastmod_by_fname = {p: os.path.getmtime(p) for p in self.root.glob('**/*') if p.is_file()} + current_lastmod_by_fname = {p: datetime.utcfromtimestamp(os.path.getmtime(p)) for p in self.root.glob('**/*') if p.is_file()} log.info(f"current_lastmod_by_fname: {current_lastmod_by_fname}") for fname in known_lastmod_by_fname: if fname not in current_lastmod_by_fname: diff --git a/docker/lwua-ingest/lwua-py/lwua/tests/all_tests.py b/docker/lwua-ingest/lwua-py/tests/all_tests.py similarity index 100% rename from docker/lwua-ingest/lwua-py/lwua/tests/all_tests.py rename to docker/lwua-ingest/lwua-py/tests/all_tests.py diff --git a/docker/lwua-ingest/lwua-py/lwua/tests/test_graphdb.py b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py similarity index 100% rename from docker/lwua-ingest/lwua-py/lwua/tests/test_graphdb.py rename to docker/lwua-ingest/lwua-py/tests/test_graphdb.py diff --git a/docker/lwua-ingest/lwua-py/lwua/tests/test_ingest.py b/docker/lwua-ingest/lwua-py/tests/test_ingest.py similarity index 100% rename from docker/lwua-ingest/lwua-py/lwua/tests/test_ingest.py rename to docker/lwua-ingest/lwua-py/tests/test_ingest.py From 5f671428c8371346fd0ac0e647f705e8f776cb0f Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Mon, 27 Nov 2023 14:34:37 +0100 Subject: [PATCH 37/60] done refactoring + tests made + workflows for autopep8 and black made --- .github/workflows/linting.yml | 34 +++++++ .github/workflows/lwua-ingest-testing.yml | 33 +++++++ data/project.ttl | 2 +- docker/docker-compose.yml | 2 + docker/lwua-ingest/lwua-py/lwua/graphdb.py | 66 ++----------- docker/lwua-ingest/lwua-py/lwua/helpers.py | 5 - docker/lwua-ingest/lwua-py/lwua/ingest.py | 98 ++++++++++++++---- docker/lwua-ingest/lwua-py/lwua/schedule.py | 12 ++- .../lwua/templates/delete_graph.sparql | 2 +- .../templates/update_context_lastmod.sparql | 3 +- docker/lwua-ingest/lwua-py/lwua/watcher.py | 30 +----- docker/lwua-ingest/lwua-py/tests/all_tests.py | 28 ------ .../lwua-ingest/lwua-py/tests/test_graphdb.py | 90 +++++++++-------- .../lwua-ingest/lwua-py/tests/test_ingest.py | 38 +++---- .../lwua-ingest/lwua-py/tests/test_queries.py | 99 +++++++++++++++++++ dotenv-example | 4 + 16 files changed, 336 insertions(+), 210 deletions(-) create mode 100644 .github/workflows/linting.yml create mode 100644 .github/workflows/lwua-ingest-testing.yml delete mode 100644 docker/lwua-ingest/lwua-py/tests/all_tests.py create mode 100644 docker/lwua-ingest/lwua-py/tests/test_queries.py diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..3307221 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,34 @@ +name: Python Linting + +on: + push: + paths: + - 'docker/lwua-ingest/**/*.py' + pull_request: + paths: + - 'docker/lwua-ingest/**/*.py' + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Check out source repository + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.10 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black autopep8 + + - name: Run Black + run: | + black --check --line-length 79 docker/lwua-ingest/ + + - name: Run autopep8 + run: | + autopep8 --in-place --aggressive --aggressive --max-line-length 79 --recursive docker/lwua-ingest/ \ No newline at end of file diff --git a/.github/workflows/lwua-ingest-testing.yml b/.github/workflows/lwua-ingest-testing.yml new file mode 100644 index 0000000..9c73f8a --- /dev/null +++ b/.github/workflows/lwua-ingest-testing.yml @@ -0,0 +1,33 @@ +name: Python Tests + +on: + push: + paths: + - 'docker/lwua-ingest/lwua-py/**/*.py' + pull_request: + paths: + - 'docker/lwua-ingest/lwua-py/**/*.py' + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Check out source repository + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.10 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install poetry + cd docker/lwua-ingest/lwua-py + poetry install + + - name: Run pytest + run: | + cd docker/lwua-ingest/lwua-py + poetry run pytest ./tests/ \ No newline at end of file diff --git a/data/project.ttl b/data/project.ttl index 3ed9d7a..2176f68 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -16,7 +16,7 @@ schema:name "Laurian van Maldeghem"^^xsd:string . a schema:Person ; - schema:name "Alain Provist"^^xsd:string . + schema:name "Alain Proviste"^^xsd:string . a schema:Person ; schema:name "Marc Portier"^^xsd:string . diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 646d386..8b3da2a 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -40,6 +40,8 @@ services: #args: image: lwua/lwua_ingest container_name: lwua_ingest + depends_on: + - graphdb volumes: - ../data:/data # Store for any input data - ../logging:/logging # Store for any input data diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 8809cef..5d49476 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -11,24 +11,12 @@ import os # from dotenv import load_dotenv -from .helpers import resolve_path, data_path_from_config # ,enable_logging +from .helpers import resolve_path # ,enable_logging from pyrdfj2 import J2RDFSyntaxBuilder log = logging.getLogger(__name__) URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") -def named_context(name: str, base: str = URN_BASE): - """ - Create a named context. - - :param name: The name of the context. - :type name: str - :param base: The base of the context. Defaults to URN_BASE. - :type base: str, optional - :return: The named context. - :rtype: str - """ - return f"{base}:{name}" def gdb_from_config(): base = os.getenv("GDB_BASE", "http://localhost:7200") @@ -55,7 +43,7 @@ def get_j2rdf_builder(): template_folder = resolve_path("./lwua/templates") log.info(f"template_folder == {template_folder}") # init J2RDFSyntaxBuilder - context = named_context("ADMIN") + context = f"{URN_BASE}:ADMIN" j2rdf = J2RDFSyntaxBuilder( templates_folder=template_folder, extra_functions = {"registry_of_lastmod_context": context} @@ -165,7 +153,7 @@ def delete_graph(context: str): def ingest_graph(graph: Graph, lastmod:datetime, context: str, replace: bool = False): """ - Convert a filename to a context. + Ingest a graph into a context. :param fname: The filename to convert. :type fname: str @@ -186,8 +174,6 @@ def ingest_graph(graph: Graph, lastmod:datetime, context: str, replace: bool = update_registry_lastmod(context, lastmod) - - def context_2_fname(context: str): """ Convert a context to a filename path. @@ -200,20 +186,6 @@ def context_2_fname(context: str): return Path(context.replace(f"{URN_BASE}:", "")) -def fname_2_context(fname: str): - """ - Convert a filename to a context. - - :param fname: The filename to convert. - :type fname: str - :return: The context corresponding to the filename. - :rtype: str - """ - return named_context(fname) - - - - def get_registry_of_lastmod(): log.info(f"getting last modified graph") @@ -228,6 +200,10 @@ def get_registry_of_lastmod(): # convert {'head': {'vars': ['graph', 'lastmod']}, 'results': {'bindings': []}} to [{PosixPath('graph'): lastmod}] # URI must be substracted from graph context and datetime str must be converted to epoch + converted = {} + return convert_results_registry_of_lastmod(results) + +def convert_results_registry_of_lastmod(results): converted = {} for g in results["results"]["bindings"]: path = context_2_fname(g["graph"]["value"]) @@ -249,31 +225,3 @@ def read_graph(fpath: Path, format: str = None): format = format or suffix_2_format(fpath.suffix) graph: Graph = Graph().parse(location=str(fpath), format=format) return graph - - -def delete_data_file(fname): - context = fname_2_context(fname) - log.info(f"deleting {fname} from {context}") - assert_context_exists(context) - delete_graph(context) - update_registry_lastmod(context, None) - - -def ingest_data_file(fname: str, lastmod: datetime, replace: bool = True): - """ - Ingest a data file. - - :param fname: The name of the file to ingest. - :type fname: str - :param replace: Whether to replace the existing data. Defaults to False. - :type replace: bool - :raises AssertionError: If the file does not exist. - """ - file_path = data_path_from_config() / fname - assert file_path.exists(), f"cannot ingest file at {file_path}" - graph = read_graph(file_path) - context = fname_2_context(fname) - log.info(f"ingesting {file_path} into {context} | replace : {replace}") - ingest_graph(graph, lastmod, context=context, replace=replace) - # TODO maintain metadata triples last-ingest / last-modified of ingested - # file in some admin graph context diff --git a/docker/lwua-ingest/lwua-py/lwua/helpers.py b/docker/lwua-ingest/lwua-py/lwua/helpers.py index 9442438..d4d0908 100644 --- a/docker/lwua-ingest/lwua-py/lwua/helpers.py +++ b/docker/lwua-ingest/lwua-py/lwua/helpers.py @@ -78,8 +78,3 @@ def resolve_path(location: str, versus: str = "module"): log.debug(f"resolve path base='{base}' + rel='{location}'") return Path(base, location).absolute() - -def data_path_from_config(): - local_default = str(resolve_path("./data", versus="dotenv")) - folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) - return Path(folder_name).absolute() \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index d429bf8..816de15 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -1,15 +1,65 @@ import logging import time from dotenv import load_dotenv +from datetime import datetime from SPARQLWrapper import SPARQLWrapper, JSON +import os from pathlib import Path -from .helpers import enable_logging, data_path_from_config -from .watcher import FolderChangeDetector, IngestChangeObserver -from .graphdb import ingest_data_file, get_registry_of_lastmod +from .helpers import enable_logging, resolve_path +from .watcher import FolderChangeObserver, FolderChangeDetector +from .graphdb import ( + get_registry_of_lastmod, + delete_graph, + ingest_graph, + update_registry_lastmod, + read_graph +) log = logging.getLogger(__name__) +# functions here to ingest and delete files +def fname_2_context(fname: str): + """ + Convert a filename to a context. + + :param fname: The filename to convert. + :type fname: str + :return: The context corresponding to the filename. + :rtype: str + """ + base = os.getenv("URN_BASE", "urn:lwua:INGEST") + return f"{base}:{fname}" + +def delete_data_file(fname): + context = fname_2_context(fname) + log.info(f"deleting {fname} from {context}") + delete_graph(context) + update_registry_lastmod(context, None) + +def ingest_data_file(fname: str, lastmod: datetime, replace: bool = True): + """ + Ingest a data file. + + :param fname: The name of the file to ingest. + :type fname: str + :param replace: Whether to replace the existing data. Defaults to True. + :type replace: bool + :raises AssertionError: If the file does not exist. + """ + file_path = data_path_from_config() / fname + assert file_path.exists(), f"cannot ingest file at {file_path}" + graph = read_graph(file_path) + context = fname_2_context(fname) + log.info(f"ingesting {file_path} into {context} | replace : {replace}") + ingest_graph(graph, lastmod, context=context, replace=replace) + + +def data_path_from_config(): + local_default = str(resolve_path("./data", versus="dotenv")) + folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) + return Path(folder_name).absolute() + def run_ingest(): data_path = data_path_from_config() log.info(f"run_ingest on updated files in {data_path}") @@ -18,21 +68,36 @@ def run_ingest(): # run while true loop with 5 second sleep detector = FolderChangeDetector(data_path) ingestor = IngestChangeObserver() - last_mod = None - while last_mod is None: - try: - last_mod = get_registry_of_lastmod() - log.info(f"initial last mod == {last_mod}") - except Exception as e: - log.exception(e) - time.sleep(2) + last_mod = {} + try: + last_mod = get_registry_of_lastmod() + log.info(f"initial last mod == {last_mod}") + except Exception as e: + log.exception(e) + + log.info("reporting changes") + last_mod = detector.report_changes(ingestor,last_mod) + log.info(f"last_mod == {last_mod}") - while True: - log.info("reporting changes") - last_mod = detector.report_changes(last_mod,ingestor) - log.info(f"last_mod == {last_mod}") - time.sleep(5) +class IngestChangeObserver(FolderChangeObserver): + def __init__(self): + pass + + def removed(self, fname): + # Implement the deletion of graph context and update of lastmod registry + log.info(f"File {fname} has been deleted") + delete_data_file(fname) + def added(self, fname, lastmod): + # Implement the addition of graph in context + log.info(f"File {fname} has been added") + ingest_data_file(fname, lastmod) + + def changed(self, fname, lastmod): + # Implement the replacement of graph in context and update the lastmod registry + log.info(f"File {fname} has been modified") + ingest_data_file(fname,lastmod, True) + # Note: this main method allows to locally test outside docker # directly connecting to a localhost graphdb endpoint (which might be @@ -42,7 +107,6 @@ def run_ingest(): def main(): load_dotenv() enable_logging() - ingest_data_file("project.ttl") if __name__ == "__main__": diff --git a/docker/lwua-ingest/lwua-py/lwua/schedule.py b/docker/lwua-ingest/lwua-py/lwua/schedule.py index 532d8a8..fcf68ee 100644 --- a/docker/lwua-ingest/lwua-py/lwua/schedule.py +++ b/docker/lwua-ingest/lwua-py/lwua/schedule.py @@ -2,6 +2,7 @@ """ import logging import time +import os from apscheduler.schedulers.blocking import BlockingScheduler from .ingest import run_ingest @@ -12,15 +13,18 @@ def main_schedule(): log.info("starting main service flow") run_ingest() - time.sleep(1) # https://apscheduler.readthedocs.io/en/3.x/userguide.html class LWUAScheduler(BlockingScheduler): def __init__(self, run_on_start: bool = True): - # todo consider injecting interval through .env - timeprops: dict = dict(minutes=30) - # timeprops: dict = dict(seconds=5) + time_delta = os.getenv("SCHEDULER_DELTA", "30") + timeprops: dict = dict(seconds=int(time_delta)) + + # get the waittime before starting the scheduler + waittime = os.getenv("SCHEDULER_WAIT", "0") + time.sleep(int(waittime)) + super().__init__() self._run_on_start = run_on_start self.add_job(lambda: main_schedule(), "interval", **timeprops) diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/delete_graph.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/delete_graph.sparql index d487b9f..5f72022 100644 --- a/docker/lwua-ingest/lwua-py/lwua/templates/delete_graph.sparql +++ b/docker/lwua-ingest/lwua-py/lwua/templates/delete_graph.sparql @@ -5,6 +5,6 @@ #} {% if context %} DELETE WHERE { - GRAPH <{{ context }}> { ?s ?p ?o } + GRAPH <{{ context }}> { ?s ?p ?o } } {% endif %} \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql index ea63399..143f4dc 100644 --- a/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql +++ b/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql @@ -19,7 +19,8 @@ INSERT { GRAPH <{{ registry_of_lastmod_context }}> { {# pyrdf2j here is a hack to convert a Python datetime object to an XSD dateTime literal. - <{{ context }}> schema:dateModified "{{ lastmod | xsd('dateTime') }} . + <{{ context }}> schema:dateModified "{{ lastmod | xsd('xsd:datetime') }} . + #} <{{ context }}> schema:dateModified "{{ lastmod }}"^^xsd:dateTime . } diff --git a/docker/lwua-ingest/lwua-py/lwua/watcher.py b/docker/lwua-ingest/lwua-py/lwua/watcher.py index fc51ef9..5ac78d1 100644 --- a/docker/lwua-ingest/lwua-py/lwua/watcher.py +++ b/docker/lwua-ingest/lwua-py/lwua/watcher.py @@ -6,11 +6,6 @@ from abc import ABC, abstractmethod import logging from lwua.helpers import enable_logging, resolve_path -from lwua.graphdb import ( - ingest_data_file, - delete_data_file, - get_registry_of_lastmod, -) log = logging.getLogger(__name__) @@ -35,7 +30,7 @@ def __init__(self, folder_to_inspect): time.sleep(1) log.info(f"Watching {self.root}") - def report_changes(self, known_lastmod_by_fname: dict = {}, observer: FolderChangeObserver = None): + def report_changes(self, observer,known_lastmod_by_fname: dict = {}): current_lastmod_by_fname = {p: datetime.utcfromtimestamp(os.path.getmtime(p)) for p in self.root.glob('**/*') if p.is_file()} log.info(f"current_lastmod_by_fname: {current_lastmod_by_fname}") for fname in known_lastmod_by_fname: @@ -49,28 +44,7 @@ def report_changes(self, known_lastmod_by_fname: dict = {}, observer: FolderChan observer.changed(fname, lastmod) return current_lastmod_by_fname - -class IngestChangeObserver(FolderChangeObserver): - def __init__(self): - pass - - def removed(self, fname): - # Implement the deletion of graph context and update of lastmod registry - log.info(f"File {fname} has been deleted") - delete_data_file(fname) - - def added(self, fname, lastmod): - # Implement the addition of graph in context - log.info(f"File {fname} has been added") - ingest_data_file(fname, lastmod) - - def changed(self, fname, lastmod): - # Implement the replacement of graph in context and update the lastmod registry - log.info(f"File {fname} has been modified") - ingest_data_file(fname,lastmod, True) - - - + # test the watcher on local file system - not in docker if __name__ == "__main__": diff --git a/docker/lwua-ingest/lwua-py/tests/all_tests.py b/docker/lwua-ingest/lwua-py/tests/all_tests.py deleted file mode 100644 index d17f806..0000000 --- a/docker/lwua-ingest/lwua-py/tests/all_tests.py +++ /dev/null @@ -1,28 +0,0 @@ -import unittest -import os -from testcontainers.compose import DockerCompose - -class DockerTestSuite(unittest.TestSuite): - def setUp(self): - self.docker_compose = DockerCompose("../../../../", "docker-compose.yml") - self.docker_compose.start() - - def tearDown(self): - self.docker_compose.stop() - - def run(self, result=None): - self.setUp() - super(DockerTestSuite, self).run(result) - self.tearDown() - -if __name__ == "__main__": - # Start test suite - loader = unittest.TestLoader() - current_dir_path = os.path.dirname(os.path.realpath(__file__)) - non_docker_suite = loader.discover(current_dir_path, pattern='test_*.py') - suite = DockerTestSuite(loader.discover(current_dir_path, pattern='docker_test_*.py')) - - # Run the tests - runner = unittest.TextTestRunner() - runner.run(suite) - runner.run(non_docker_suite) \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/tests/test_graphdb.py b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py index 4bdd437..1a747e2 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_graphdb.py +++ b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py @@ -1,44 +1,48 @@ -import unittest -from unittest.mock import patch, MagicMock +import pytest +from datetime import datetime from pathlib import Path -import lwua.graphdb as graphdb - -class TestGraphDB(unittest.TestCase): - @patch('graphdb.suffix_2_format') - @patch('graphdb.Graph') - def test_read_graph(self, mock_graph, mock_suffix_2_format): - # Arrange - mock_suffix_2_format.return_value = 'xml' - mock_graph_instance = mock_graph.return_value - mock_graph_instance.parse.return_value = None - - # Act - result = graphdb.read_graph(Path('test.xml')) - - # Assert - mock_suffix_2_format.assert_called_once_with('.xml') - mock_graph.assert_called_once() - mock_graph_instance.parse.assert_called_once_with(location='test.xml', format='xml') - self.assertIsNone(result) - - @patch('graphdb.fname_2_context') - @patch('graphdb.log.info') - @patch('graphdb.assert_context_exists') - @patch('graphdb.delete_graph') - @patch('graphdb.update_registry_lastmod') - def test_delete_data_file(self, mock_update_registry_lastmod, mock_delete_graph, mock_assert_context_exists, mock_log_info, mock_fname_2_context): - # Arrange - mock_fname_2_context.return_value = 'context' - - # Act - graphdb.delete_data_file('test.xml') - - # Assert - mock_fname_2_context.assert_called_once_with('test.xml') - mock_log_info.assert_called_once() - mock_assert_context_exists.assert_called_once_with('context') - mock_delete_graph.assert_called_once_with('context') - mock_update_registry_lastmod.assert_called_once_with('context', None) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file +from lwua.graphdb import context_2_fname, suffix_2_format, read_graph, convert_results_registry_of_lastmod # replace 'your_module_path' with the actual module path +from lwua.ingest import data_path_from_config + +results = { + "head": {"vars": ["graph", "lastmod"]}, + "results": {"bindings": [{"graph": {"value": "urn:lwua:INGEST:test_file.txt"}, "lastmod": {"value": "2022-01-01T00:00:00"}}]} + } + +def test_context_2_fname(): + # Act + converted = context_2_fname("urn:lwua:INGEST:test_file.txt") + + # Assert + assert isinstance(converted, Path) + +def get_registry_of_lastmod(results): + # Act + converted = convert_results_registry_of_lastmod(results) + + # Assert + assert isinstance(converted, dict) + assert len(converted) == 1 + assert converted[Path("test_file.txt")] == datetime.fromisoformat("2022-01-01T00:00:00") + +def test_suffix_2_format(): + # Arrange + suffixes = ["ttl", "turtle", "jsonld", "json", "other"] + + # Act + results = [suffix_2_format(suffix) for suffix in suffixes] + + # Assert + assert results == ["turtle", "turtle", "json-ld", "json-ld", None] + +def test_read_graph(): + # Arrange + fpath = data_path_from_config() / "project.ttl" # replace with a test file path + format = "turtle" + + # Act + graph = read_graph(fpath, format) + + # Assert + assert graph is not None + # Add more assertions based on what the read_graph function is supposed to do \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/tests/test_ingest.py b/docker/lwua-ingest/lwua-py/tests/test_ingest.py index 206d7bb..e2ea99a 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_ingest.py +++ b/docker/lwua-ingest/lwua-py/tests/test_ingest.py @@ -1,26 +1,18 @@ -import unittest -from unittest.mock import patch, MagicMock -from lwua.ingest import run_ingest +# test file for the ingest.py file in the lwua-py folder +import pytest +import os +from lwua.ingest import fname_2_context, data_path_from_config -class TestIngest(unittest.TestCase): - @patch('ingest.get_registry_of_lastmod') - @patch('ingest.FolderChangeDetector') - @patch('ingest.IngestChangeObserver') - def test_run_ingest(self, mock_observer, mock_detector, mock_lastmod): - # Arrange - mock_lastmod.return_value = None - mock_detector_instance = mock_detector.return_value - mock_detector_instance.report_changes.return_value = None - mock_observer_instance = mock_observer.return_value +def test_fname_2_context(): + # Arrange + fname = "test_file.txt" # replace with a test file name - # Act - run_ingest() + # Act + # Call the function with the test parameters + result = fname_2_context(fname) - # Assert - mock_lastmod.assert_called_once() - mock_detector.assert_called_once() - mock_observer.assert_called_once() - mock_detector_instance.report_changes.assert_called() - -if __name__ == '__main__': - unittest.main() \ No newline at end of file + # Assert + # Check the results + # This depends on what the function does + # For example, if the function returns a string based on the file name, you can check if the string is correct + assert result == "urn:lwua:INGEST:test_file.txt", f"Expected 'expected_result', but got '{result}'" diff --git a/docker/lwua-ingest/lwua-py/tests/test_queries.py b/docker/lwua-ingest/lwua-py/tests/test_queries.py new file mode 100644 index 0000000..bc16173 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/tests/test_queries.py @@ -0,0 +1,99 @@ +# test file to test the JRDF builder and all the templates + +from pyrdfj2 import J2RDFSyntaxBuilder +from lwua.helpers import resolve_path +import os + +URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") + +def get_j2rdf_builder(): + template_folder = os.path.join(os.path.dirname(__file__), "../lwua/templates") + # init J2RDFSyntaxBuilder + context = f"{URN_BASE}:ADMIN" + j2rdf = J2RDFSyntaxBuilder( + templates_folder=template_folder, + extra_functions = {"registry_of_lastmod_context": context} + ) + return j2rdf + +J2RDF = get_j2rdf_builder() + +def test_template_insert_graph(): + # Arrange + template = "insert_graph.sparql" + vars = { + "context": "urn:lwua:INGEST:test_file.txt", + "raw_triples": " .", + } + + # Act + query = J2RDF.build_syntax(template, **vars) + + #clean up the query by removing the newlines + query = query.replace("\n", "") + + print(query) + to_expect = "INSERT DATA { GRAPH { . } }" + # Assert + assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" + +def test_template_delete_graph(): + # Arrange + template = "delete_graph.sparql" + vars = { + "context": "urn:lwua:INGEST:test_file.txt", + } + + # Act + query = J2RDF.build_syntax(template, **vars) + + #clean up the query by removing the newlines + query = query.replace("\n", "") + + print(query) + to_expect = "DELETE WHERE { GRAPH { ?s ?p ?o }}" + # Assert + assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" + +def test_template_update_context_lastmod(): + # Arrange + template = "update_context_lastmod.sparql" + vars = { + "context": "urn:lwua:INGEST:test_file.txt", + "lastmod": "2022-01-01T00:00:00", + } + + # Act + query = J2RDF.build_syntax(template, **vars) + + #clean up the query by removing the newlines + query = query.replace("\n", "") + #replace all spaces with nothing + query = query.replace(" ", "") + + to_expect = 'PREFIX schema: DELETE { GRAPH { schema:dateModified ?date . }}INSERT { GRAPH { schema:dateModified "2022-01-01T00:00:00"^^xsd:dateTime . } }WHERE { OPTIONAL { GRAPH { schema:dateModified ?date . } }}' + # replace all spaces with nothing + to_expect = to_expect.replace(" ", "") + # Assert + assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" + +def test_template_lastmod_info(): + # Arrange + template = "lastmod_info.sparql" + vars = { + "context": "urn:lwua:INGEST:test_file.txt", + } + + # Act + query = J2RDF.build_syntax(template, **vars) + + #clean up the query by removing the newlines + query = query.replace("\n", "") + #replace all spaces with nothing + query = query.replace(" ", "") + + to_expect = 'SELECT ?graph ?lastmod WHERE { GRAPH { ?graph ?lastmod }}' + # replace all spaces with nothing + to_expect = to_expect.replace(" ", "") + # Assert + assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" \ No newline at end of file diff --git a/dotenv-example b/dotenv-example index cd0c225..a19cf77 100644 --- a/dotenv-example +++ b/dotenv-example @@ -3,3 +3,7 @@ LOGCONF=debug-logconf.yml # general repo setting GDB_REPO="lwua23" + +# scheduler settings +SCHEDULER_DELTA=5 # seconds +SCHEDULER_WAIT=5 # seconds | this is needed because the graphdb container takes some time to start, else the first run will be an error From f486db3a111bebc065dfcf97d0bcee92aee74138 Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Mon, 27 Nov 2023 14:37:53 +0100 Subject: [PATCH 38/60] changed version for workflows --- .github/workflows/linting.yml | 2 +- .github/workflows/lwua-ingest-testing.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 3307221..24c67f0 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.10 + python-version: 3.10.0 - name: Install dependencies run: | diff --git a/.github/workflows/lwua-ingest-testing.yml b/.github/workflows/lwua-ingest-testing.yml index 9c73f8a..b1019b4 100644 --- a/.github/workflows/lwua-ingest-testing.yml +++ b/.github/workflows/lwua-ingest-testing.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.10 + python-version: 3.10.0 - name: Install dependencies run: | From 103b51c768dc346149f4eff93d9190af9a37bf56 Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Mon, 27 Nov 2023 14:41:00 +0100 Subject: [PATCH 39/60] renaming workflow file + change in python test file to check if action retriggers --- .github/workflows/{linting.yml => linting-python-files.yml} | 0 docker/lwua-ingest/lwua-py/lwua/graphdb.py | 1 - 2 files changed, 1 deletion(-) rename .github/workflows/{linting.yml => linting-python-files.yml} (100%) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting-python-files.yml similarity index 100% rename from .github/workflows/linting.yml rename to .github/workflows/linting-python-files.yml diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 5d49476..9a0d802 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -65,7 +65,6 @@ def update_registry_lastmod( """ log.info(f"update registry_of_lastmod_context on {context}") - # check if context is IRI compliant assert_iri_compliance(context) From 3eed9b73b84b8212fef163f0b12f507a97967b1c Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Mon, 27 Nov 2023 14:43:52 +0100 Subject: [PATCH 40/60] changed python workflow versions to work with arch x64 --- .github/workflows/linting-python-files.yml | 2 +- .github/workflows/lwua-ingest-testing.yml | 2 +- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linting-python-files.yml b/.github/workflows/linting-python-files.yml index 24c67f0..747a1bf 100644 --- a/.github/workflows/linting-python-files.yml +++ b/.github/workflows/linting-python-files.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.10.0 + python-version: 3.10.6 - name: Install dependencies run: | diff --git a/.github/workflows/lwua-ingest-testing.yml b/.github/workflows/lwua-ingest-testing.yml index b1019b4..84489f5 100644 --- a/.github/workflows/lwua-ingest-testing.yml +++ b/.github/workflows/lwua-ingest-testing.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.10.0 + python-version: 3.10.6 - name: Install dependencies run: | diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 9a0d802..40c01c9 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -65,6 +65,7 @@ def update_registry_lastmod( """ log.info(f"update registry_of_lastmod_context on {context}") + # check if context is IRI compliant assert_iri_compliance(context) From a2c6cf8be83dd61b7ff43adcb7dd215dcc30afdc Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Mon, 27 Nov 2023 14:51:53 +0100 Subject: [PATCH 41/60] attempt 4 at working linting --- .github/workflows/linting-python-files.yml | 12 ++++++++++-- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linting-python-files.yml b/.github/workflows/linting-python-files.yml index 747a1bf..ad41f44 100644 --- a/.github/workflows/linting-python-files.yml +++ b/.github/workflows/linting-python-files.yml @@ -27,8 +27,16 @@ jobs: - name: Run Black run: | - black --check --line-length 79 docker/lwua-ingest/ + black docker/lwua-ingest/ - name: Run autopep8 run: | - autopep8 --in-place --aggressive --aggressive --max-line-length 79 --recursive docker/lwua-ingest/ \ No newline at end of file + autopep8 --in-place --aggressive --aggressive --max-line-length 79 --recursive docker/lwua-ingest/ + + - name: Commit and push changes + run: | + git config --global user.name 'cedricdcc' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + git add -A + git commit -m "Automated code formatting" || exit 0 + git push \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 40c01c9..25a03ba 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -66,6 +66,7 @@ def update_registry_lastmod( log.info(f"update registry_of_lastmod_context on {context}") + # check if context is IRI compliant assert_iri_compliance(context) From c1af6c735a999dcc775ca1c6ca8741b755426cfd Mon Sep 17 00:00:00 2001 From: cedricdcc Date: Mon, 27 Nov 2023 13:52:25 +0000 Subject: [PATCH 42/60] Automated code formatting --- docker/lwua-ingest/lwua-py/lwua/daemon.py | 7 +-- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 43 ++++++++------- docker/lwua-ingest/lwua-py/lwua/helpers.py | 5 +- docker/lwua-ingest/lwua-py/lwua/ingest.py | 29 +++++++---- docker/lwua-ingest/lwua-py/lwua/schedule.py | 4 +- docker/lwua-ingest/lwua-py/lwua/watcher.py | 14 +++-- .../lwua-ingest/lwua-py/tests/test_graphdb.py | 35 ++++++++++--- .../lwua-ingest/lwua-py/tests/test_ingest.py | 8 ++- .../lwua-ingest/lwua-py/tests/test_queries.py | 52 +++++++++++-------- 9 files changed, 120 insertions(+), 77 deletions(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/daemon.py b/docker/lwua-ingest/lwua-py/lwua/daemon.py index 8ea9d46..24055b8 100644 --- a/docker/lwua-ingest/lwua-py/lwua/daemon.py +++ b/docker/lwua-ingest/lwua-py/lwua/daemon.py @@ -102,9 +102,7 @@ def stop(self): pid = None if not pid: - message = ( - f"pidfile {self.pidfile} does not exist. Daemon not running?\n" - ) + message = f"pidfile {self.pidfile} does not exist. Daemon not running?\n" sys.stderr.write(message) return # not an error in a restart @@ -140,8 +138,7 @@ def run(self): def _usage(self): print( - f"run this daemon script with one argument == {'|'.join(Daemon.CMDS)}" - ) + f"run this daemon script with one argument == {'|'.join(Daemon.CMDS)}") def _cmd(self, argv): if len(argv) != 2: diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 25a03ba..54552ac 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -11,7 +11,7 @@ import os # from dotenv import load_dotenv -from .helpers import resolve_path # ,enable_logging +from .helpers import resolve_path # ,enable_logging from pyrdfj2 import J2RDFSyntaxBuilder log = logging.getLogger(__name__) @@ -37,8 +37,10 @@ def gdb_from_config(): GDB.method = "POST" return GDB + GDB = gdb_from_config() + def get_j2rdf_builder(): template_folder = resolve_path("./lwua/templates") log.info(f"template_folder == {template_folder}") @@ -46,15 +48,15 @@ def get_j2rdf_builder(): context = f"{URN_BASE}:ADMIN" j2rdf = J2RDFSyntaxBuilder( templates_folder=template_folder, - extra_functions = {"registry_of_lastmod_context": context} - ) + extra_functions={"registry_of_lastmod_context": context}, + ) return j2rdf + J2RDF = get_j2rdf_builder() -def update_registry_lastmod( - context: str, lastmod: datetime -): + +def update_registry_lastmod(context: str, lastmod: datetime): """ Update the administration of a context. @@ -64,9 +66,7 @@ def update_registry_lastmod( :type lastmod: str """ log.info(f"update registry_of_lastmod_context on {context}") - - - + # check if context is IRI compliant assert_iri_compliance(context) @@ -74,7 +74,7 @@ def update_registry_lastmod( template = "update_context_lastmod.sparql" vars = { "context": context, - "lastmod": lastmod.isoformat() if lastmod is not None else None + "lastmod": lastmod.isoformat() if lastmod is not None else None, } # get the sparql query query = J2RDF.build_syntax(template, **vars) @@ -85,7 +85,8 @@ def update_registry_lastmod( def assert_iri_compliance(context: str): - assert context.startswith(URN_BASE), f"Context {context} is not IRI compliant" + assert context.startswith( + URN_BASE), f"Context {context} is not IRI compliant" def insert_graph(graph: Graph, context: str = None): @@ -101,7 +102,6 @@ def insert_graph(graph: Graph, context: str = None): log.info(f"insert_graph into {context}") assert_context_exists(context) # Initialize the J2RDFSyntaxBuilder - assert_iri_compliance(context) if context is not None else None @@ -117,7 +117,7 @@ def insert_graph(graph: Graph, context: str = None): # Execute the query GDB.setQuery(query) GDB.query() - + def assert_context_exists(context: str): assert context is not None, "Context cannot be None" @@ -135,7 +135,6 @@ def delete_graph(context: str): assert_context_exists(context) # Initialize the J2RDFSyntaxBuilder - # check if context is IRI compliant assert_iri_compliance(context) if context is not None else None @@ -152,7 +151,11 @@ def delete_graph(context: str): GDB.query() -def ingest_graph(graph: Graph, lastmod:datetime, context: str, replace: bool = False): +def ingest_graph( + graph: Graph, + lastmod: datetime, + context: str, + replace: bool = False): """ Ingest a graph into a context. @@ -197,13 +200,15 @@ def get_registry_of_lastmod(): GDB.setQuery(query) GDB.setReturnFormat(JSON) results = GDB.query().convert() - + # convert {'head': {'vars': ['graph', 'lastmod']}, 'results': {'bindings': []}} to [{PosixPath('graph'): lastmod}] - # URI must be substracted from graph context and datetime str must be converted to epoch - + # URI must be substracted from graph context and datetime str must be + # converted to epoch + converted = {} return convert_results_registry_of_lastmod(results) + def convert_results_registry_of_lastmod(results): converted = {} for g in results["results"]["bindings"]: @@ -211,7 +216,7 @@ def convert_results_registry_of_lastmod(results): time = datetime.fromisoformat(g["lastmod"]["value"]) converted[path] = time return converted - + def suffix_2_format(suffix): if suffix in ["ttl", "turtle"]: diff --git a/docker/lwua-ingest/lwua-py/lwua/helpers.py b/docker/lwua-ingest/lwua-py/lwua/helpers.py index d4d0908..7c3c720 100644 --- a/docker/lwua-ingest/lwua-py/lwua/helpers.py +++ b/docker/lwua-ingest/lwua-py/lwua/helpers.py @@ -71,10 +71,7 @@ def getinstance(*args, **kwargs): def resolve_path(location: str, versus: str = "module"): location = location if location else "" - assert ( - versus in LOCATIONS - ), f"no base path available for coded versus = '{versus}'" + assert versus in LOCATIONS, f"no base path available for coded versus = '{versus}'" base: Path = LOCATIONS[versus] log.debug(f"resolve path base='{base}' + rel='{location}'") return Path(base, location).absolute() - diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 816de15..90ef1ed 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -12,12 +12,13 @@ delete_graph, ingest_graph, update_registry_lastmod, - read_graph + read_graph, ) log = logging.getLogger(__name__) + # functions here to ingest and delete files def fname_2_context(fname: str): """ @@ -31,12 +32,14 @@ def fname_2_context(fname: str): base = os.getenv("URN_BASE", "urn:lwua:INGEST") return f"{base}:{fname}" + def delete_data_file(fname): context = fname_2_context(fname) log.info(f"deleting {fname} from {context}") delete_graph(context) update_registry_lastmod(context, None) - + + def ingest_data_file(fname: str, lastmod: datetime, replace: bool = True): """ Ingest a data file. @@ -53,17 +56,18 @@ def ingest_data_file(fname: str, lastmod: datetime, replace: bool = True): context = fname_2_context(fname) log.info(f"ingesting {file_path} into {context} | replace : {replace}") ingest_graph(graph, lastmod, context=context, replace=replace) - + def data_path_from_config(): local_default = str(resolve_path("./data", versus="dotenv")) folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) return Path(folder_name).absolute() + def run_ingest(): data_path = data_path_from_config() log.info(f"run_ingest on updated files in {data_path}") - + # get the last context graph modification dates # run while true loop with 5 second sleep detector = FolderChangeDetector(data_path) @@ -76,15 +80,17 @@ def run_ingest(): log.exception(e) log.info("reporting changes") - last_mod = detector.report_changes(ingestor,last_mod) + last_mod = detector.report_changes(ingestor, last_mod) log.info(f"last_mod == {last_mod}") - + + class IngestChangeObserver(FolderChangeObserver): def __init__(self): pass def removed(self, fname): - # Implement the deletion of graph context and update of lastmod registry + # Implement the deletion of graph context and update of lastmod + # registry log.info(f"File {fname} has been deleted") delete_data_file(fname) @@ -92,12 +98,13 @@ def added(self, fname, lastmod): # Implement the addition of graph in context log.info(f"File {fname} has been added") ingest_data_file(fname, lastmod) - + def changed(self, fname, lastmod): - # Implement the replacement of graph in context and update the lastmod registry + # Implement the replacement of graph in context and update the lastmod + # registry log.info(f"File {fname} has been modified") - ingest_data_file(fname,lastmod, True) - + ingest_data_file(fname, lastmod, True) + # Note: this main method allows to locally test outside docker # directly connecting to a localhost graphdb endpoint (which might be diff --git a/docker/lwua-ingest/lwua-py/lwua/schedule.py b/docker/lwua-ingest/lwua-py/lwua/schedule.py index fcf68ee..2ecbf77 100644 --- a/docker/lwua-ingest/lwua-py/lwua/schedule.py +++ b/docker/lwua-ingest/lwua-py/lwua/schedule.py @@ -20,11 +20,11 @@ class LWUAScheduler(BlockingScheduler): def __init__(self, run_on_start: bool = True): time_delta = os.getenv("SCHEDULER_DELTA", "30") timeprops: dict = dict(seconds=int(time_delta)) - + # get the waittime before starting the scheduler waittime = os.getenv("SCHEDULER_WAIT", "0") time.sleep(int(waittime)) - + super().__init__() self._run_on_start = run_on_start self.add_job(lambda: main_schedule(), "interval", **timeprops) diff --git a/docker/lwua-ingest/lwua-py/lwua/watcher.py b/docker/lwua-ingest/lwua-py/lwua/watcher.py index 5ac78d1..2e7a332 100644 --- a/docker/lwua-ingest/lwua-py/lwua/watcher.py +++ b/docker/lwua-ingest/lwua-py/lwua/watcher.py @@ -9,6 +9,7 @@ log = logging.getLogger(__name__) + class FolderChangeObserver(ABC): @abstractmethod def added(self, fname: str, lastmod: datetime = None): @@ -22,6 +23,7 @@ def removed(self, fname: str): def changed(self, fname: str, lastmod: datetime = None): pass + class FolderChangeDetector: def __init__(self, folder_to_inspect): self.root = Path(folder_to_inspect) @@ -30,8 +32,12 @@ def __init__(self, folder_to_inspect): time.sleep(1) log.info(f"Watching {self.root}") - def report_changes(self, observer,known_lastmod_by_fname: dict = {}): - current_lastmod_by_fname = {p: datetime.utcfromtimestamp(os.path.getmtime(p)) for p in self.root.glob('**/*') if p.is_file()} + def report_changes(self, observer, known_lastmod_by_fname: dict = {}): + current_lastmod_by_fname = { + p: datetime.utcfromtimestamp(os.path.getmtime(p)) + for p in self.root.glob("**/*") + if p.is_file() + } log.info(f"current_lastmod_by_fname: {current_lastmod_by_fname}") for fname in known_lastmod_by_fname: if fname not in current_lastmod_by_fname: @@ -42,9 +48,9 @@ def report_changes(self, observer,known_lastmod_by_fname: dict = {}): observer.added(fname, lastmod) elif lastmod > known_lastmod_by_fname[fname]: observer.changed(fname, lastmod) - + return current_lastmod_by_fname - + # test the watcher on local file system - not in docker if __name__ == "__main__": diff --git a/docker/lwua-ingest/lwua-py/tests/test_graphdb.py b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py index 1a747e2..964e014 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_graphdb.py +++ b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py @@ -1,13 +1,26 @@ import pytest from datetime import datetime from pathlib import Path -from lwua.graphdb import context_2_fname, suffix_2_format, read_graph, convert_results_registry_of_lastmod # replace 'your_module_path' with the actual module path +from lwua.graphdb import ( + context_2_fname, + suffix_2_format, + read_graph, + convert_results_registry_of_lastmod, +) # replace 'your_module_path' with the actual module path from lwua.ingest import data_path_from_config results = { - "head": {"vars": ["graph", "lastmod"]}, - "results": {"bindings": [{"graph": {"value": "urn:lwua:INGEST:test_file.txt"}, "lastmod": {"value": "2022-01-01T00:00:00"}}]} - } + "head": {"vars": ["graph", "lastmod"]}, + "results": { + "bindings": [ + { + "graph": {"value": "urn:lwua:INGEST:test_file.txt"}, + "lastmod": {"value": "2022-01-01T00:00:00"}, + } + ] + }, +} + def test_context_2_fname(): # Act @@ -15,7 +28,8 @@ def test_context_2_fname(): # Assert assert isinstance(converted, Path) - + + def get_registry_of_lastmod(results): # Act converted = convert_results_registry_of_lastmod(results) @@ -23,8 +37,11 @@ def get_registry_of_lastmod(results): # Assert assert isinstance(converted, dict) assert len(converted) == 1 - assert converted[Path("test_file.txt")] == datetime.fromisoformat("2022-01-01T00:00:00") - + assert converted[Path("test_file.txt")] == datetime.fromisoformat( + "2022-01-01T00:00:00" + ) + + def test_suffix_2_format(): # Arrange suffixes = ["ttl", "turtle", "jsonld", "json", "other"] @@ -35,6 +52,7 @@ def test_suffix_2_format(): # Assert assert results == ["turtle", "turtle", "json-ld", "json-ld", None] + def test_read_graph(): # Arrange fpath = data_path_from_config() / "project.ttl" # replace with a test file path @@ -45,4 +63,5 @@ def test_read_graph(): # Assert assert graph is not None - # Add more assertions based on what the read_graph function is supposed to do \ No newline at end of file + # Add more assertions based on what the read_graph function is supposed to + # do diff --git a/docker/lwua-ingest/lwua-py/tests/test_ingest.py b/docker/lwua-ingest/lwua-py/tests/test_ingest.py index e2ea99a..b67881a 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_ingest.py +++ b/docker/lwua-ingest/lwua-py/tests/test_ingest.py @@ -3,6 +3,7 @@ import os from lwua.ingest import fname_2_context, data_path_from_config + def test_fname_2_context(): # Arrange fname = "test_file.txt" # replace with a test file name @@ -14,5 +15,8 @@ def test_fname_2_context(): # Assert # Check the results # This depends on what the function does - # For example, if the function returns a string based on the file name, you can check if the string is correct - assert result == "urn:lwua:INGEST:test_file.txt", f"Expected 'expected_result', but got '{result}'" + # For example, if the function returns a string based on the file name, + # you can check if the string is correct + assert ( + result == "urn:lwua:INGEST:test_file.txt" + ), f"Expected 'expected_result', but got '{result}'" diff --git a/docker/lwua-ingest/lwua-py/tests/test_queries.py b/docker/lwua-ingest/lwua-py/tests/test_queries.py index bc16173..fc6efe2 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_queries.py +++ b/docker/lwua-ingest/lwua-py/tests/test_queries.py @@ -6,18 +6,23 @@ URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") + def get_j2rdf_builder(): - template_folder = os.path.join(os.path.dirname(__file__), "../lwua/templates") + template_folder = os.path.join( + os.path.dirname(__file__), + "../lwua/templates") # init J2RDFSyntaxBuilder context = f"{URN_BASE}:ADMIN" j2rdf = J2RDFSyntaxBuilder( templates_folder=template_folder, - extra_functions = {"registry_of_lastmod_context": context} - ) + extra_functions={"registry_of_lastmod_context": context}, + ) return j2rdf + J2RDF = get_j2rdf_builder() + def test_template_insert_graph(): # Arrange template = "insert_graph.sparql" @@ -28,15 +33,16 @@ def test_template_insert_graph(): # Act query = J2RDF.build_syntax(template, **vars) - - #clean up the query by removing the newlines + + # clean up the query by removing the newlines query = query.replace("\n", "") - + print(query) to_expect = "INSERT DATA { GRAPH { . } }" # Assert assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" - + + def test_template_delete_graph(): # Arrange template = "delete_graph.sparql" @@ -46,15 +52,16 @@ def test_template_delete_graph(): # Act query = J2RDF.build_syntax(template, **vars) - - #clean up the query by removing the newlines + + # clean up the query by removing the newlines query = query.replace("\n", "") - + print(query) to_expect = "DELETE WHERE { GRAPH { ?s ?p ?o }}" # Assert assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" - + + def test_template_update_context_lastmod(): # Arrange template = "update_context_lastmod.sparql" @@ -65,18 +72,19 @@ def test_template_update_context_lastmod(): # Act query = J2RDF.build_syntax(template, **vars) - - #clean up the query by removing the newlines + + # clean up the query by removing the newlines query = query.replace("\n", "") - #replace all spaces with nothing + # replace all spaces with nothing query = query.replace(" ", "") - + to_expect = 'PREFIX schema: DELETE { GRAPH { schema:dateModified ?date . }}INSERT { GRAPH { schema:dateModified "2022-01-01T00:00:00"^^xsd:dateTime . } }WHERE { OPTIONAL { GRAPH { schema:dateModified ?date . } }}' # replace all spaces with nothing to_expect = to_expect.replace(" ", "") # Assert assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" - + + def test_template_lastmod_info(): # Arrange template = "lastmod_info.sparql" @@ -86,14 +94,14 @@ def test_template_lastmod_info(): # Act query = J2RDF.build_syntax(template, **vars) - - #clean up the query by removing the newlines + + # clean up the query by removing the newlines query = query.replace("\n", "") - #replace all spaces with nothing + # replace all spaces with nothing query = query.replace(" ", "") - - to_expect = 'SELECT ?graph ?lastmod WHERE { GRAPH { ?graph ?lastmod }}' + + to_expect = "SELECT ?graph ?lastmod WHERE { GRAPH { ?graph ?lastmod }}" # replace all spaces with nothing to_expect = to_expect.replace(" ", "") # Assert - assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" \ No newline at end of file + assert query == to_expect, f"Expected '{to_expect}', but got '{query}'" From d982426413894035ceb834db4442f565f67f09c1 Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Wed, 29 Nov 2023 09:10:05 +0100 Subject: [PATCH 43/60] last reforctoring mods --- data/project.ttl | 2 +- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 26 ++++++++-- docker/lwua-ingest/lwua-py/lwua/ingest.py | 52 ++++++++----------- docker/lwua-ingest/lwua-py/lwua/schedule.py | 23 ++++---- .../templates/update_context_lastmod.sparql | 18 ++----- docker/lwua-ingest/lwua-py/lwua/watcher.py | 2 +- dotenv-example | 2 +- 7 files changed, 61 insertions(+), 64 deletions(-) diff --git a/data/project.ttl b/data/project.ttl index 2176f68..3ed9d7a 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -16,7 +16,7 @@ schema:name "Laurian van Maldeghem"^^xsd:string . a schema:Person ; - schema:name "Alain Proviste"^^xsd:string . + schema:name "Alain Provist"^^xsd:string . a schema:Person ; schema:name "Marc Portier"^^xsd:string . diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 54552ac..192d756 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -5,6 +5,7 @@ from SPARQLWrapper import SPARQLWrapper, JSON from datetime import datetime from pathlib import Path +from urllib.parse import unquote, quote import logging from rdflib import Graph import time @@ -74,11 +75,11 @@ def update_registry_lastmod(context: str, lastmod: datetime): template = "update_context_lastmod.sparql" vars = { "context": context, - "lastmod": lastmod.isoformat() if lastmod is not None else None, + "lastmod": lastmod if lastmod is not None else None, } # get the sparql query query = J2RDF.build_syntax(template, **vars) - # log.debug(f"update_registry_lastmod query == {query}") + log.debug(f"update_registry_lastmod query == {query}") # execute the query GDB.setQuery(query) GDB.query() @@ -187,7 +188,21 @@ def context_2_fname(context: str): :return: The filename corresponding to the context. :rtype: str """ - return Path(context.replace(f"{URN_BASE}:", "")) + assert context.startswith(URN_BASE), f"Context {context} is not IRI compliant" + return unquote(context[len(URN_BASE) + 1:]) + +def fname_2_context(fname: str): + """ + Convert a filename to a context. + + :param fname: The filename to convert. + :type fname: str + :return: The context corresponding to the filename. + :rtype: str + """ + base = os.getenv("URN_BASE", "urn:lwua:INGEST") + fname = str(fname) + return f"{base}:{quote(fname)}" def get_registry_of_lastmod(): @@ -218,7 +233,8 @@ def convert_results_registry_of_lastmod(results): return converted -def suffix_2_format(suffix): +def format_from_filepath(fpath:Path): + suffix = fpath.suffix[1:].lower() if suffix in ["ttl", "turtle"]: return "turtle" if suffix in ["jsonld", "json"]: @@ -228,6 +244,6 @@ def suffix_2_format(suffix): def read_graph(fpath: Path, format: str = None): - format = format or suffix_2_format(fpath.suffix) + format = format or format_from_filepath(fpath) graph: Graph = Graph().parse(location=str(fpath), format=format) return graph diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index 90ef1ed..ed44444 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -13,6 +13,7 @@ ingest_graph, update_registry_lastmod, read_graph, + fname_2_context ) @@ -20,18 +21,6 @@ # functions here to ingest and delete files -def fname_2_context(fname: str): - """ - Convert a filename to a context. - - :param fname: The filename to convert. - :type fname: str - :return: The context corresponding to the filename. - :rtype: str - """ - base = os.getenv("URN_BASE", "urn:lwua:INGEST") - return f"{base}:{fname}" - def delete_data_file(fname): context = fname_2_context(fname) @@ -63,25 +52,26 @@ def data_path_from_config(): folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) return Path(folder_name).absolute() - -def run_ingest(): - data_path = data_path_from_config() - log.info(f"run_ingest on updated files in {data_path}") - - # get the last context graph modification dates - # run while true loop with 5 second sleep - detector = FolderChangeDetector(data_path) - ingestor = IngestChangeObserver() - last_mod = {} - try: - last_mod = get_registry_of_lastmod() - log.info(f"initial last mod == {last_mod}") - except Exception as e: - log.exception(e) - - log.info("reporting changes") - last_mod = detector.report_changes(ingestor, last_mod) - log.info(f"last_mod == {last_mod}") +class Ingester: + def __init__(self): + data_path = data_path_from_config() + log.info(f"run_ingest on updated files in {data_path}") + + # get the last context graph modification dates + # run while true loop with 5 second sleep + self.detector = FolderChangeDetector(data_path) + self.ingestor = IngestChangeObserver() + + def run_ingest(self): + last_mod = {} + try: + last_mod = get_registry_of_lastmod() + log.info(f"initial last mod == {last_mod}") + log.info("reporting changes") + self.detector.report_changes(self.ingestor, last_mod) + log.info(f"last_mod == {last_mod}") + except Exception as e: + log.exception(e) class IngestChangeObserver(FolderChangeObserver): diff --git a/docker/lwua-ingest/lwua-py/lwua/schedule.py b/docker/lwua-ingest/lwua-py/lwua/schedule.py index 2ecbf77..c2c2378 100644 --- a/docker/lwua-ingest/lwua-py/lwua/schedule.py +++ b/docker/lwua-ingest/lwua-py/lwua/schedule.py @@ -4,35 +4,36 @@ import time import os from apscheduler.schedulers.blocking import BlockingScheduler -from .ingest import run_ingest +from .ingest import Ingester log = logging.getLogger(__name__) -def main_schedule(): - log.info("starting main service flow") - run_ingest() - - # https://apscheduler.readthedocs.io/en/3.x/userguide.html class LWUAScheduler(BlockingScheduler): def __init__(self, run_on_start: bool = True): - time_delta = os.getenv("SCHEDULER_DELTA", "30") + time_delta = os.getenv("SCHEDULER_PERIOD", "300") timeprops: dict = dict(seconds=int(time_delta)) # get the waittime before starting the scheduler - waittime = os.getenv("SCHEDULER_WAIT", "0") + waittime = os.getenv("SCHEDULER_WAIT", "10") time.sleep(int(waittime)) - + super().__init__() self._run_on_start = run_on_start - self.add_job(lambda: main_schedule(), "interval", **timeprops) + self.add_job(lambda: self.main_schedule(), "interval", **timeprops) def start(self): try: + self.ingester = Ingester() if self._run_on_start: - main_schedule() + self.main_schedule() super().start() except (KeyboardInterrupt, SystemExit): log.info("execution interrupted") + + def main_schedule(self): + log.info("starting main service flow") + self.ingester.run_ingest() + diff --git a/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql b/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql index 143f4dc..60f6ae5 100644 --- a/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql +++ b/docker/lwua-ingest/lwua-py/lwua/templates/update_context_lastmod.sparql @@ -9,27 +9,17 @@ #} PREFIX schema: +WITH <{{ registry_of_lastmod_context }}> DELETE { - GRAPH <{{ registry_of_lastmod_context }}> { <{{ context }}> schema:dateModified ?date . - } } INSERT { {% if lastmod %} - GRAPH <{{ registry_of_lastmod_context }}> { - {# - pyrdf2j here is a hack to convert a Python datetime object to an XSD dateTime literal. - <{{ context }}> schema:dateModified "{{ lastmod | xsd('xsd:datetime') }} . - - #} - <{{ context }}> schema:dateModified "{{ lastmod }}"^^xsd:dateTime . - } + <{{ context }}> schema:dateModified {{ lastmod | xsd('xsd:datetime') }} . {% endif %} } -WHERE { - OPTIONAL { - GRAPH <{{ registry_of_lastmod_context }}> { +WHERE { + OPTIONAL { {# Optional wrapper is needed because if no date is found no insert will be executed #} <{{ context }}> schema:dateModified ?date . } - } } \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/lwua/watcher.py b/docker/lwua-ingest/lwua-py/lwua/watcher.py index 2e7a332..18eb08a 100644 --- a/docker/lwua-ingest/lwua-py/lwua/watcher.py +++ b/docker/lwua-ingest/lwua-py/lwua/watcher.py @@ -34,7 +34,7 @@ def __init__(self, folder_to_inspect): def report_changes(self, observer, known_lastmod_by_fname: dict = {}): current_lastmod_by_fname = { - p: datetime.utcfromtimestamp(os.path.getmtime(p)) + str(p): datetime.utcfromtimestamp(p.stat().st_mtime) for p in self.root.glob("**/*") if p.is_file() } diff --git a/dotenv-example b/dotenv-example index a19cf77..f1d979e 100644 --- a/dotenv-example +++ b/dotenv-example @@ -5,5 +5,5 @@ LOGCONF=debug-logconf.yml GDB_REPO="lwua23" # scheduler settings -SCHEDULER_DELTA=5 # seconds +SCHEDULER_PERIOD=5 # seconds SCHEDULER_WAIT=5 # seconds | this is needed because the graphdb container takes some time to start, else the first run will be an error From c6246c8253d8b19b3cca138902be9ca462135a21 Mon Sep 17 00:00:00 2001 From: cedricdcc Date: Wed, 29 Nov 2023 08:10:32 +0000 Subject: [PATCH 44/60] Automated code formatting --- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 6 ++++-- docker/lwua-ingest/lwua-py/lwua/ingest.py | 4 +++- docker/lwua-ingest/lwua-py/lwua/schedule.py | 7 +++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 192d756..07615f0 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -188,9 +188,11 @@ def context_2_fname(context: str): :return: The filename corresponding to the context. :rtype: str """ - assert context.startswith(URN_BASE), f"Context {context} is not IRI compliant" + assert context.startswith( + URN_BASE), f"Context {context} is not IRI compliant" return unquote(context[len(URN_BASE) + 1:]) + def fname_2_context(fname: str): """ Convert a filename to a context. @@ -233,7 +235,7 @@ def convert_results_registry_of_lastmod(results): return converted -def format_from_filepath(fpath:Path): +def format_from_filepath(fpath: Path): suffix = fpath.suffix[1:].lower() if suffix in ["ttl", "turtle"]: return "turtle" diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index ed44444..f1c3fb4 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -13,7 +13,7 @@ ingest_graph, update_registry_lastmod, read_graph, - fname_2_context + fname_2_context, ) @@ -22,6 +22,7 @@ # functions here to ingest and delete files + def delete_data_file(fname): context = fname_2_context(fname) log.info(f"deleting {fname} from {context}") @@ -52,6 +53,7 @@ def data_path_from_config(): folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) return Path(folder_name).absolute() + class Ingester: def __init__(self): data_path = data_path_from_config() diff --git a/docker/lwua-ingest/lwua-py/lwua/schedule.py b/docker/lwua-ingest/lwua-py/lwua/schedule.py index c2c2378..eb67cba 100644 --- a/docker/lwua-ingest/lwua-py/lwua/schedule.py +++ b/docker/lwua-ingest/lwua-py/lwua/schedule.py @@ -19,21 +19,20 @@ def __init__(self, run_on_start: bool = True): # get the waittime before starting the scheduler waittime = os.getenv("SCHEDULER_WAIT", "10") time.sleep(int(waittime)) - + super().__init__() self._run_on_start = run_on_start self.add_job(lambda: self.main_schedule(), "interval", **timeprops) def start(self): try: - self.ingester = Ingester() + self.ingester = Ingester() if self._run_on_start: self.main_schedule() super().start() except (KeyboardInterrupt, SystemExit): log.info("execution interrupted") - + def main_schedule(self): log.info("starting main service flow") self.ingester.run_ingest() - From 9375895991c83ba298512e8bc1f87d2e844e3bed Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Wed, 29 Nov 2023 14:48:33 +0100 Subject: [PATCH 45/60] added beginning of dereferencer --- data/project.ttl | 5 +- docker/dereferencer/Dockerfile | 38 ++ .../dereferencer-py/debug-logconf.yml | 35 ++ .../dereferencer-py/dereferencer/__init__.py | 4 + .../dereferencer-py/dereferencer/__main__.py | 45 ++ .../dereferencer-py/dereferencer/daemon.py | 152 ++++++ .../dereferencer/dereference.py | 17 + .../dereferencer-py/dereferencer/helpers.py | 77 +++ .../dereferencer-py/dereferencer/schedule.py | 38 ++ docker/dereferencer/dereferencer-py/logging | 1 + .../dereferencer/dereferencer-py/poetry.lock | 438 ++++++++++++++++++ .../dereferencer-py/pyproject.toml | 23 + docker/docker-compose.yml | 33 ++ 13 files changed, 902 insertions(+), 4 deletions(-) create mode 100644 docker/dereferencer/Dockerfile create mode 100644 docker/dereferencer/dereferencer-py/debug-logconf.yml create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/__init__.py create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/__main__.py create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/daemon.py create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/dereference.py create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/helpers.py create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/schedule.py create mode 100644 docker/dereferencer/dereferencer-py/logging create mode 100644 docker/dereferencer/dereferencer-py/poetry.lock create mode 100644 docker/dereferencer/dereferencer-py/pyproject.toml diff --git a/data/project.ttl b/data/project.ttl index 3ed9d7a..7c862d0 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -15,11 +15,8 @@ a schema:Person ; schema:name "Laurian van Maldeghem"^^xsd:string . - a schema:Person ; - schema:name "Alain Provist"^^xsd:string . - a schema:Person ; - schema:name "Marc Portier"^^xsd:string . + schema:name "Marque Portier"^^xsd:string . a schema:Organization ; schema:name "VLIZ vzw"^^xsd:string . diff --git a/docker/dereferencer/Dockerfile b/docker/dereferencer/Dockerfile new file mode 100644 index 0000000..7891672 --- /dev/null +++ b/docker/dereferencer/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.10-slim as python +VOLUME /data +VOLUME /logging +WORKDIR /dereferencer-py +ENV PYTHONUNBUFFERED=true + +# check for inspiration on including poetry +# - https://hub.docker.com/r/airdock/python-poetry +# - https://binx.io/nl/2022/06/13/poetry-docker/ + + +# create an image version point where poetry and its dependencies are available +# and use that to build the python package locally +FROM python as poetry +# gcc needed in the build of many python dependencies +# removed from python-slim for size trimming - but have to re-add here +RUN apt-get update -y && apt-get upgrade -y && apt-get install -y gcc +ENV POETRY_HOME=/opt/poetry +ENV POETRY_VIRTUALENVS_IN_PROJECT=true +ENV PATH="$POETRY_HOME/bin:$PATH" +RUN python -c 'from urllib.request import urlopen; print(urlopen("https://install.python-poetry.org").read().decode())' | python - + +# get the source code in +COPY ./dereferencer-py /dereferencer-py +# use poetry to build and install -- creating the local .venv +RUN poetry install --no-interaction --no-ansi -vvv + + +# now go back to the original slim image to build the runtime image +# and just grab the build env from the intermediate stage +FROM python as runtime +# ensure the .venv python is used +ENV PATH="/dereferencer-py/.venv/bin:$PATH" +# ensure we have the build folder from the poetry stage of this image +COPY --from=poetry /dereferencer-py /dereferencer-py + +RUN rm -f /dereferencer-py/logging && ln -s /logging /dereferencer-py/logging +ENTRYPOINT ["python", "-m", "dereferencer"] \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/debug-logconf.yml b/docker/dereferencer/dereferencer-py/debug-logconf.yml new file mode 100644 index 0000000..1ee4e9e --- /dev/null +++ b/docker/dereferencer/dereferencer-py/debug-logconf.yml @@ -0,0 +1,35 @@ +# new yaml format for python logging config +# see https://docs.python.org/3/library/logging.config.html for description of dictConfig +version: 1 +formatters: + base: + format: '%(asctime)-18s @%(name)-23s [%(levelname)-8s] %(message)s' + datefmt: '%Y-%m-%d %H:%M:%S' +handlers: + stderr: + class: logging.StreamHandler + level: DEBUG + formatter: base + stream: ext://sys.stderr + file: + class: logging.FileHandler + level: DEBUG + formatter: base + filename: logging/dereferencer-debug.log + mode: 'a' # in stead of 'w' +loggers: + __main__: + level: DEBUG + propagate: yes + tests: + level: DEBUG + propagate: yes + lwua: + level: DEBUG + propagate: yes + apscheduler: + level: WARN + propagate: yes +root: + level: DEBUG + handlers: [stderr, file] diff --git a/docker/dereferencer/dereferencer-py/dereferencer/__init__.py b/docker/dereferencer/dereferencer-py/dereferencer/__init__.py new file mode 100644 index 0000000..5f11648 --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/__init__.py @@ -0,0 +1,4 @@ +__all__ = ["DereferenceScheduler"] + +# Add scheduler path here +from .schedule import DereferenceScheduler \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/dereferencer/__main__.py b/docker/dereferencer/dereferencer-py/dereferencer/__main__.py new file mode 100644 index 0000000..ac12b2e --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/__main__.py @@ -0,0 +1,45 @@ +""" LWUAIngest service entry-point +-- support service-like commands start/stop/status/reload +-- will start the cron-service-dispatch and the web UI endpoint +""" +from .daemon import Daemon +from .helpers import enable_logging, resolve_path +from dotenv import load_dotenv +from dereferencer import DereferenceScheduler +import sys +import logging + +log = logging.getLogger(__name__) + + +class IngestDaemon(Daemon): + def run(self): + try: + # setup + log.info("setting up") + scheduler: DereferenceScheduler = DereferenceScheduler() + + # action + log.info("starting schedule") + scheduler.start() + + except Exception as e: + log.exception(e) + finally: + # teardown + log.info("teardown") + + +def main(): + load_dotenv() + enable_logging() + + pidfilename: str = "dereferencer-daemon.pid" + # double dirname ends at parent! + pidfile: str = resolve_path(pidfilename) + + IngestDaemon(pidfile)._cmd(sys.argv) + + +if __name__ == "__main__": + main() diff --git a/docker/dereferencer/dereferencer-py/dereferencer/daemon.py b/docker/dereferencer/dereferencer-py/dereferencer/daemon.py new file mode 100644 index 0000000..24055b8 --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/daemon.py @@ -0,0 +1,152 @@ +""" Deamon wrapper script +--> https://stackoverflow.com/questions/1603109/how-to-make-a-python-script-run-like-a-service-or-daemon-in-linux +--> https://web.archive.org/web/20160320091458/http://www.jejik.com/files/examples/daemon3x.py +""" +import sys +import os +import time +import atexit +import signal +from abc import ABC, abstractmethod +import logging + + +log = logging.getLogger(__name__) + + +class Daemon(ABC): + """A generic daemon class. + Usage: subclass the daemon class and override the run() method. + """ + + def __init__(self, pidfile): + self.pidfile = str(pidfile) + + def daemonize(self): + """Deamonize class. UNIX double fork mechanism.""" + + try: + pid = os.fork() + if pid > 0: + # exit first parent + sys.exit(0) + except OSError as err: + log.exception(f"fork #1 failed: {err}") + sys.exit(1) + + # decouple from parent environment + os.chdir("/") + os.setsid() + os.umask(0) + + # do second fork + try: + pid = os.fork() + if pid > 0: + # exit from second parent + sys.exit(0) + except OSError as err: + log.exception(f"fork #2 failed: {err}") + sys.exit(1) + + # redirect standard file descriptors + sys.stdout.flush() + sys.stderr.flush() + si = open(os.devnull, "r") + so = open(os.devnull, "a+") + se = open(os.devnull, "a+") + + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + + # write pidfile + atexit.register(self.delpid) + + pid = str(os.getpid()) + log.info(f"creating pidfile {self.pidfile} - containing {pid}") + with open(self.pidfile, "w+") as f: + f.write(pid + "\n") + + def delpid(self): + log.info("atexit -- service run completed -- removing pidfile") + os.remove(self.pidfile) + + def start(self): + """Start the daemon.""" + + # Check for a pidfile to see if the daemon already runs + try: + with open(self.pidfile, "r") as pf: + pid = int(pf.read().strip()) + except IOError: + pid = None + + if pid: + message = f"pidfile {self.pidfile} already exist. Daemon already running?\n" + sys.stderr.write(message) + sys.exit(1) + + # Start the daemon + self.daemonize() + self.run() + + def stop(self): + """Stop the daemon.""" + + # Get the pid from the pidfile + try: + with open(self.pidfile, "r") as pf: + pid = int(pf.read().strip()) + except IOError: + pid = None + + if not pid: + message = f"pidfile {self.pidfile} does not exist. Daemon not running?\n" + sys.stderr.write(message) + return # not an error in a restart + + # Try killing the daemon process + + try: + while True: + os.kill(pid, signal.SIGTERM) + time.sleep(0.1) + except OSError as err: + e = str(err.args) + if e.find("No such process") > 0: + if os.path.exists(self.pidfile): + os.remove(self.pidfile) + else: + print(str(err.args)) + sys.exit(1) + + def restart(self): + """Restart the daemon.""" + self.stop() + self.start() + + @abstractmethod + def run(self): + """You should override this method when you subclass Daemon. + + It will be called after the process has been daemonized by + start() or restart(). + """ + + CMDS = ["start", "stop", "restart", "run"] + + def _usage(self): + print( + f"run this daemon script with one argument == {'|'.join(Daemon.CMDS)}") + + def _cmd(self, argv): + if len(argv) != 2: + log.warning(f"daemon started with cmdline ==> {argv}") + return self._usage() + # else + cmd = argv[1] + if cmd not in Daemon.CMDS: + return self._usage() + # else + self.__getattribute__(cmd)() diff --git a/docker/dereferencer/dereferencer-py/dereferencer/dereference.py b/docker/dereferencer/dereferencer-py/dereferencer/dereference.py new file mode 100644 index 0000000..2d3d444 --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/dereference.py @@ -0,0 +1,17 @@ +import logging +import time +from dotenv import load_dotenv +from datetime import datetime +from SPARQLWrapper import SPARQLWrapper, JSON +import os +from pathlib import Path +from .helpers import enable_logging, resolve_path + +log = logging.getLogger(__name__) + +class Dereference: + def __init__(self): + pass + + def run_dereference(self): + log.info("running dereference") \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/dereferencer/helpers.py b/docker/dereferencer/dereferencer-py/dereferencer/helpers.py new file mode 100644 index 0000000..7c3c720 --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/helpers.py @@ -0,0 +1,77 @@ +import yaml +import logging +import logging.config +import os +from pathlib import Path +from dotenv import find_dotenv + + +log = logging.getLogger(__name__) + + +def yaml_load_file(file): + if file is None: + log.debug("can not load unspecified yaml file") + return None + # else + try: + with open(file, "r") as yml_file: + return yaml.load(yml_file, Loader=yaml.SafeLoader) + except Exception as e: + log.exception(e) + return dict() + + +def find_logconf(logconf): + if logconf is None or logconf == "": + return None + for vs in ["dotenv", "module", "work"]: # try in this order + logconf_path = resolve_path(logconf, versus=vs) + print(f"trying vs {vs} --> {logconf_path} ?") + if logconf_path.exists(): + return logconf_path + # else + raise Exception( + f"config error logconf file {logconf} not found relative to dotenv, module or pwd" + ) + + +def enable_logging(logconf: str = None): + """Configures logging based on logconf specified through .env ${LOGCONF}""" + logconf = os.getenv("LOGCONF") if logconf is None else logconf + logconf_path = find_logconf(logconf) + if logconf_path is None: + log.info("No logging config found.") + return + # else + logconf = str(logconf_path) + logging.config.dictConfig(yaml_load_file(logconf)) + log.info(f"Logging enabled according to config in {logconf}") + + +def singleton(class_): + """Decorator for singleton classes""" + instances = {} + + def getinstance(*args, **kwargs): + if class_ not in instances: + instances[class_] = class_(*args, **kwargs) + return instances[class_] + + return getinstance + + +LOCATIONS: dict[str, Path] = dict( + work=Path().cwd(), + helpers=Path(__file__).parent.absolute(), + module=Path(__file__).parent.parent.absolute(), + dotenv=Path(find_dotenv()).parent, +) + + +def resolve_path(location: str, versus: str = "module"): + location = location if location else "" + assert versus in LOCATIONS, f"no base path available for coded versus = '{versus}'" + base: Path = LOCATIONS[versus] + log.debug(f"resolve path base='{base}' + rel='{location}'") + return Path(base, location).absolute() diff --git a/docker/dereferencer/dereferencer-py/dereferencer/schedule.py b/docker/dereferencer/dereferencer-py/dereferencer/schedule.py new file mode 100644 index 0000000..5d4d2c4 --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/schedule.py @@ -0,0 +1,38 @@ +""" Scheduling the regular running functions of the main workflow (ingest / process / store) +""" +import logging +import time +import os +from apscheduler.schedulers.blocking import BlockingScheduler +from .dereference import Dereference + + +log = logging.getLogger(__name__) + + +# https://apscheduler.readthedocs.io/en/3.x/userguide.html +class DereferenceScheduler(BlockingScheduler): + def __init__(self, run_on_start: bool = True): + time_delta = os.getenv("SCHEDULER_PERIOD", "300") + timeprops: dict = dict(seconds=int(time_delta)) + + # get the waittime before starting the scheduler + waittime = os.getenv("SCHEDULER_WAIT", "10") + time.sleep(int(waittime)) + + super().__init__() + self._run_on_start = run_on_start + self.add_job(lambda: self.main_schedule(), "interval", **timeprops) + + def start(self): + try: + self.dereferencer = Dereference() + if self._run_on_start: + self.main_schedule() + super().start() + except (KeyboardInterrupt, SystemExit): + log.info("execution interrupted") + + def main_schedule(self): + log.info("starting main service flow") + self.dereferencer.run_dereference() diff --git a/docker/dereferencer/dereferencer-py/logging b/docker/dereferencer/dereferencer-py/logging new file mode 100644 index 0000000..42ff418 --- /dev/null +++ b/docker/dereferencer/dereferencer-py/logging @@ -0,0 +1 @@ +../../../logging/ \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/poetry.lock b/docker/dereferencer/dereferencer-py/poetry.lock new file mode 100644 index 0000000..2397eb1 --- /dev/null +++ b/docker/dereferencer/dereferencer-py/poetry.lock @@ -0,0 +1,438 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "apscheduler" +version = "3.10.4" +description = "In-process task scheduler with Cron-like capabilities" +optional = false +python-versions = ">=3.6" +files = [ + {file = "APScheduler-3.10.4-py3-none-any.whl", hash = "sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661"}, + {file = "APScheduler-3.10.4.tar.gz", hash = "sha256:e6df071b27d9be898e486bc7940a7be50b4af2e9da7c08f0744a96d4bd4cef4a"}, +] + +[package.dependencies] +pytz = "*" +six = ">=1.4.0" +tzlocal = ">=2.0,<3.dev0 || >=4.dev0" + +[package.extras] +doc = ["sphinx", "sphinx-rtd-theme"] +gevent = ["gevent"] +mongodb = ["pymongo (>=3.0)"] +redis = ["redis (>=3.0)"] +rethinkdb = ["rethinkdb (>=2.4.0)"] +sqlalchemy = ["sqlalchemy (>=1.4)"] +testing = ["pytest", "pytest-asyncio", "pytest-cov", "pytest-tornado5"] +tornado = ["tornado (>=4.3)"] +twisted = ["twisted"] +zookeeper = ["kazoo"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "isodate" +version = "0.6.1" +description = "An ISO 8601 date/time/duration parser and formatter" +optional = false +python-versions = "*" +files = [ + {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, + {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, +] + +[package.dependencies] +six = "*" + +[[package]] +name = "jinja2" +version = "3.1.2" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "markupsafe" +version = "2.1.3" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pyaml" +version = "23.9.7" +description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyaml-23.9.7-py3-none-any.whl", hash = "sha256:fdb4c111b676d2381d1aa88c378fcde46c167575dfd688e656977a77075b692c"}, + {file = "pyaml-23.9.7.tar.gz", hash = "sha256:581ea4e99f0e308864407e04c03c609241aefa3a15dfba8964da7644baf3b217"}, +] + +[package.dependencies] +PyYAML = "*" + +[package.extras] +anchors = ["unidecode"] + +[[package]] +name = "pyparsing" +version = "3.1.1" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, + {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pyrdfj2" +version = "0.0.5" +description = "Python wrapper on jinja SPARQL templating" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "pyrdfj2-0.0.5-py3-none-any.whl", hash = "sha256:fa8dabb66668345d6da64a58e9cead75b02090abd3727a9577140db10b00d6ce"}, + {file = "pyrdfj2-0.0.5.tar.gz", hash = "sha256:6d840015f7d493313c7fa432a5bef924da5cb9c5d76c39bd237caa36fcc1476a"}, +] + +[package.dependencies] +jinja2 = "*" +python-dateutil = "*" +uritemplate = "*" + +[[package]] +name = "pytest" +version = "7.4.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, + {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-dotenv" +version = "1.0.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, + {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "pytz" +version = "2023.3.post1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "rdflib" +version = "7.0.0" +description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." +optional = false +python-versions = ">=3.8.1,<4.0.0" +files = [ + {file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"}, + {file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"}, +] + +[package.dependencies] +isodate = ">=0.6.0,<0.7.0" +pyparsing = ">=2.1.0,<4" + +[package.extras] +berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] +html = ["html5lib (>=1.0,<2.0)"] +lxml = ["lxml (>=4.3.0,<5.0.0)"] +networkx = ["networkx (>=2.0.0,<3.0.0)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sparqlwrapper" +version = "2.0.0" +description = "SPARQL Endpoint interface to Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "SPARQLWrapper-2.0.0-py3-none-any.whl", hash = "sha256:c99a7204fff676ee28e6acef327dc1ff8451c6f7217dcd8d49e8872f324a8a20"}, + {file = "SPARQLWrapper-2.0.0.tar.gz", hash = "sha256:3fed3ebcc77617a4a74d2644b86fd88e0f32e7f7003ac7b2b334c026201731f1"}, +] + +[package.dependencies] +rdflib = ">=6.1.1" + +[package.extras] +dev = ["mypy (>=0.931)", "pandas (>=1.3.5)", "pandas-stubs (>=1.2.0.48)", "setuptools (>=3.7.1)"] +docs = ["sphinx (<5)", "sphinx-rtd-theme"] +keepalive = ["keepalive (>=0.5)"] +pandas = ["pandas (>=1.3.5)"] + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + +[[package]] +name = "tzlocal" +version = "5.2" +description = "tzinfo object for the local timezone" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"}, + {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"}, +] + +[package.dependencies] +tzdata = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] + +[[package]] +name = "uritemplate" +version = "4.1.1" +description = "Implementation of RFC 6570 URI Templates" +optional = false +python-versions = ">=3.6" +files = [ + {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, + {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "05f703f9820516a62de9200fc60306b19dd0af9a35ca726b1a0236f1c804d674" diff --git a/docker/dereferencer/dereferencer-py/pyproject.toml b/docker/dereferencer/dereferencer-py/pyproject.toml new file mode 100644 index 0000000..1de2eb6 --- /dev/null +++ b/docker/dereferencer/dereferencer-py/pyproject.toml @@ -0,0 +1,23 @@ +[tool.poetry] +name = "dereferencer-py" +version = "0.1.0" +description = "Dereferencing tool that will add triples to a knowledge graph given a config containing a SPARQL query and a list of property paths are given." +authors = ["cedricdcc <30471340+cedricdcc@users.noreply.github.com>"] +license = "CC0" +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.10" +apscheduler = "^3.10.4" +pyaml = "^23.9.7" +python-dotenv = "^1.0.0" +sparqlwrapper = "^2.0.0" +pyrdfj2 = "^0.0.5" + + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.3" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 8b3da2a..1502515 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -34,6 +34,39 @@ services: be.vliz.container.project: "LWUA" be.vliz.container.group: "services" + dereferencer: + build: + context: ./dereferencer/ + #args: + image: lwua/dereferencer + container_name: dereferencer + depends_on: + - graphdb + volumes: + - ../data:/data # Store for any input data + - ../logging:/logging # Store for any input data + env_file: + - ../.env + environment: + - GDB_BASE=http://graphdb:7200/ + - INGEST_DATA_FOLDER=/data + - CONFIG_FILE=/configs/config_dereferencer.yml + # for test / dev -- no restart and single run + restart: "no" + command: run + # towards deploy -- make restart and keep service running -- consequence: use ctrl-c to stop + # restart: unless-stopped + # command: start + links: + - graphdb + logging: + driver: json-file + options: + max-size: 10m + labels: + be.vliz.container.project: "LWUA" + be.vliz.container.group: "services" + ingest: build: context: ./lwua-ingest/ From 3fa7ada1f698a355c822b7cfacbab8191871bbbc Mon Sep 17 00:00:00 2001 From: cedricd Date: Thu, 30 Nov 2023 23:41:36 +0100 Subject: [PATCH 46/60] added dereferencing config and memory --- .github/workflows/linting-python-files.yml | 6 +- configs/dereference_test.yml | 10 ++ docker/dereferencer/Dockerfile | 2 +- .../dereferencer-py/debug-logconf.yml | 2 +- .../dereferencer/derefEntity.py | 100 +++++++++++ .../dereferencer/dereference.py | 37 +++- .../dereferencer-py/dereferencer/graph.py | 44 +++++ .../dereferencer-py/dereferencer/schedule.py | 3 + .../dereferencer/dereferencer-py/poetry.lock | 160 +++++++++++++++++- .../dereferencer-py/pyproject.toml | 3 +- docker/docker-compose.yml | 2 +- 11 files changed, 362 insertions(+), 7 deletions(-) create mode 100644 configs/dereference_test.yml create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/graph.py diff --git a/.github/workflows/linting-python-files.yml b/.github/workflows/linting-python-files.yml index ad41f44..1e2bbbf 100644 --- a/.github/workflows/linting-python-files.yml +++ b/.github/workflows/linting-python-files.yml @@ -4,9 +4,11 @@ on: push: paths: - 'docker/lwua-ingest/**/*.py' + - 'docker/dereferencer/**/*.py' pull_request: paths: - 'docker/lwua-ingest/**/*.py' + - 'docker/dereferencer/**/*.py' jobs: lint: @@ -28,15 +30,17 @@ jobs: - name: Run Black run: | black docker/lwua-ingest/ + black docker/dereferencer/ - name: Run autopep8 run: | autopep8 --in-place --aggressive --aggressive --max-line-length 79 --recursive docker/lwua-ingest/ + autopep8 --in-place --aggressive --aggressive --max-line-length 79 --recursive docker/dereferencer/ - name: Commit and push changes run: | git config --global user.name 'cedricdcc' git config --global user.email 'github-actions[bot]@users.noreply.github.com' git add -A - git commit -m "Automated code formatting" || exit 0 + git commit -m "Automated python code formatting" || exit 0 git push \ No newline at end of file diff --git a/configs/dereference_test.yml b/configs/dereference_test.yml new file mode 100644 index 0000000..2030425 --- /dev/null +++ b/configs/dereference_test.yml @@ -0,0 +1,10 @@ +SPARQL: > + PREFIX rdf: + PREFIX schema: + SELECT ?s + WHERE { + ?s rdf:type schema:Person . + } LIMIT 100 +property_paths: + + \ No newline at end of file diff --git a/docker/dereferencer/Dockerfile b/docker/dereferencer/Dockerfile index 7891672..9d95cdf 100644 --- a/docker/dereferencer/Dockerfile +++ b/docker/dereferencer/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim as python +FROM python:3.11-slim as python VOLUME /data VOLUME /logging WORKDIR /dereferencer-py diff --git a/docker/dereferencer/dereferencer-py/debug-logconf.yml b/docker/dereferencer/dereferencer-py/debug-logconf.yml index 1ee4e9e..295ac9b 100644 --- a/docker/dereferencer/dereferencer-py/debug-logconf.yml +++ b/docker/dereferencer/dereferencer-py/debug-logconf.yml @@ -24,7 +24,7 @@ loggers: tests: level: DEBUG propagate: yes - lwua: + dereferencer: level: DEBUG propagate: yes apscheduler: diff --git a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py new file mode 100644 index 0000000..cae3a9e --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -0,0 +1,100 @@ +# This file will contain all the classes and functions needed to complete the dereferencing process + +import logging +from pathlib import Path +import json +import requests +import os + +from .helpers import resolve_path + +log = logging.getLogger(__name__) + +def metadata_folder_from_config(): + local_default = str(resolve_path("./memory_dereferencer", versus="dotenv")) + folder_name = os.getenv("METADATA_FILES_FOLDER", local_default) + return folder_name + +METADATA_FOLDER = metadata_folder_from_config() + +def check_metadata_folder_exists(): + metadata_folder = metadata_folder_from_config() + if not os.path.exists(metadata_folder): + log.info(f"metadata folder {metadata_folder} does not exist, creating it") + os.makedirs(metadata_folder) + +METADATA_FILE = METADATA_FOLDER + "/metadata.json" + +def make_metadata(input_list): + result = [] + + if input_list is None: + return result + + for item in input_list: + if isinstance(item, dict): + for key, values in item.items(): + if isinstance(values, list): + children = make_metadata(values) + result.append({key: {"completed": False, "children": children}}) + else: + result.append({values: {"completed": False}}) + elif isinstance(item, str): + result.append({item: {"completed": False}}) + + return result + +def save_metadata(uri:str,data:object,completed:bool=False): + """saves the metadata to file + """ + with open(METADATA_FILE, 'r') as f: + # read the json file + current_metadata = json.load(f) + # replace the metadata for the uri with the new metadata + current_metadata[uri] = {"completed":completed, "metadata": data} + with open(METADATA_FILE, 'w') as f: + json.dump(current_metadata, f) + +class DerefUriEntity: + def __init__(self, uri:str, propertypaths:dict): + self.uri = uri + self.propertypathmetadata = None + self.propertypaths = propertypaths + + check_metadata_folder_exists() + if not os.path.exists(METADATA_FILE): + log.info(f"metadata file {METADATA_FILE} does not exist, creating it") + with open(METADATA_FILE, 'w') as f: + json.dump({}, f) + else: + with open(METADATA_FILE, 'r') as f: + self.metadata = json.load(f) + self.propertypathmetadata = self.get_metadata_uri(uri) + + log.debug(f"propertypathmetadata: {self.propertypathmetadata}") + + #if propertyMetadata is None then run function make metadata + if self.propertypathmetadata is None: + self.propertypathmetadata = make_metadata(self.propertypaths) + log.info(f"propertypathmetadata: {self.propertypathmetadata}") + save_metadata(self.uri, self.propertypathmetadata) + + def get_metadata_uri(self, uri:str): + """checks if a given uri is in the metadata + + :param uri: uri to check + :type uri: str + :return: dict with the metadata for the uri + """ + if uri in self.metadata: + return self.metadata[uri]["metadata"] + else: + return None + + + + + + + + \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/dereferencer/dereference.py b/docker/dereferencer/dereferencer-py/dereferencer/dereference.py index 2d3d444..94be083 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/dereference.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/dereference.py @@ -4,14 +4,49 @@ from datetime import datetime from SPARQLWrapper import SPARQLWrapper, JSON import os +import yaml from pathlib import Path from .helpers import enable_logging, resolve_path +from .graph import uri_list +from .derefEntity import DerefUriEntity log = logging.getLogger(__name__) +def config_path_from_config(): + local_default = str(resolve_path("../configs", versus="dotenv")) + folder_name = os.getenv("CONFIG_FILES_FOLDER", local_default) + return Path(folder_name).absolute() + class Dereference: def __init__(self): pass def run_dereference(self): - log.info("running dereference") \ No newline at end of file + log.info("running dereference") + + config_folder_path = config_path_from_config() + log.info(f"run_dereference on config files in {config_folder_path}") + # get all the config files in the config folder + # the files should be in yml or yaml format and should start with dereference + config_files = [f for f in config_folder_path.glob("dereference*.yml")] + log.info(f"config files found: {config_files}") + + # for each config file , parse the file and get the config + for config_file in config_files: + log.info(f"config file: {config_file}") + with open(config_file, 'r') as stream: + try: + config = yaml.safe_load(stream) + log.info(f"config: {config}") + + sparql_query = config["SPARQL"] + uri_list_from_query = uri_list(sparql_query) + + # make a derefEntity for each uri in the uri_list_from_query + for uri in uri_list_from_query: + log.info(f"uri: {uri}") + derefEntity = DerefUriEntity(uri, config["property_paths"]) + log.info(f"derefEntity: {derefEntity}") + + except yaml.YAMLError as exc: + log.error(exc) \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/dereferencer/graph.py b/docker/dereferencer/dereferencer-py/dereferencer/graph.py new file mode 100644 index 0000000..00fa9ae --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/graph.py @@ -0,0 +1,44 @@ +# this file will contain the functions that will be used to query the graph database + +import logging +from SPARQLWrapper import SPARQLWrapper, JSON +import logging +import os + +# from dotenv import load_dotenv + +log = logging.getLogger(__name__) + +def gdb_from_config(): + base = os.getenv("GDB_BASE", "http://localhost:7200") + repoid = os.getenv("GDB_REPO", "lwua23") + + endpoint = f"{ base }/repositories/{ repoid }" + # update statements are handled at other endpoint + updateEndpoint = endpoint + "/statements" + + log.debug(f"using endpoint {endpoint}") + + GDB = SPARQLWrapper( + endpoint=endpoint, + updateEndpoint=updateEndpoint, + returnFormat=JSON, + agent="lwua-python-sparql-client", + ) + GDB.method = "POST" + return GDB + + +GDB = gdb_from_config() + +def uri_list(query): + """ + Return a list of URI's from a query + """ + log.debug(f"uri_list: {query}") + GDB.setQuery(query) + GDB.setReturnFormat(JSON) + results = GDB.query().convert() + log.debug(f"uri_list: results: {results}") + return [result["s"]["value"] for result in results["results"]["bindings"]] + diff --git a/docker/dereferencer/dereferencer-py/dereferencer/schedule.py b/docker/dereferencer/dereferencer-py/dereferencer/schedule.py index 5d4d2c4..951a4ee 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/schedule.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/schedule.py @@ -26,12 +26,15 @@ def __init__(self, run_on_start: bool = True): def start(self): try: + log.info("starting dereferencer scheduler") self.dereferencer = Dereference() if self._run_on_start: self.main_schedule() super().start() except (KeyboardInterrupt, SystemExit): log.info("execution interrupted") + except Exception as e: + log.exception(e) def main_schedule(self): log.info("starting main service flow") diff --git a/docker/dereferencer/dereferencer-py/poetry.lock b/docker/dereferencer/dereferencer-py/poetry.lock index 2397eb1..c6fde16 100644 --- a/docker/dereferencer/dereferencer-py/poetry.lock +++ b/docker/dereferencer/dereferencer-py/poetry.lock @@ -28,6 +28,116 @@ tornado = ["tornado (>=4.3)"] twisted = ["twisted"] zookeeper = ["kazoo"] +[[package]] +name = "certifi" +version = "2023.11.17" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"}, + {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -39,6 +149,17 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "idna" +version = "3.6" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, + {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -362,6 +483,27 @@ html = ["html5lib (>=1.0,<2.0)"] lxml = ["lxml (>=4.3.0,<5.0.0)"] networkx = ["networkx (>=2.0.0,<3.0.0)"] +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + [[package]] name = "six" version = "1.16.0" @@ -432,7 +574,23 @@ files = [ {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, ] +[[package]] +name = "urllib3" +version = "2.1.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.1.0-py3-none-any.whl", hash = "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3"}, + {file = "urllib3-2.1.0.tar.gz", hash = "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "05f703f9820516a62de9200fc60306b19dd0af9a35ca726b1a0236f1c804d674" +content-hash = "2dca960013881b31821cd303cd54f00e36cdb86e49bed187c5acfab72556900a" diff --git a/docker/dereferencer/dereferencer-py/pyproject.toml b/docker/dereferencer/dereferencer-py/pyproject.toml index 1de2eb6..1e042db 100644 --- a/docker/dereferencer/dereferencer-py/pyproject.toml +++ b/docker/dereferencer/dereferencer-py/pyproject.toml @@ -7,12 +7,13 @@ license = "CC0" readme = "README.md" [tool.poetry.dependencies] -python = "^3.10" +python = "^3.11" apscheduler = "^3.10.4" pyaml = "^23.9.7" python-dotenv = "^1.0.0" sparqlwrapper = "^2.0.0" pyrdfj2 = "^0.0.5" +requests = "^2.31.0" [tool.poetry.group.dev.dependencies] diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 1502515..95e3743 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -44,13 +44,13 @@ services: - graphdb volumes: - ../data:/data # Store for any input data + - ../configs:/configs # Store for any input data - ../logging:/logging # Store for any input data env_file: - ../.env environment: - GDB_BASE=http://graphdb:7200/ - INGEST_DATA_FOLDER=/data - - CONFIG_FILE=/configs/config_dereferencer.yml # for test / dev -- no restart and single run restart: "no" command: run From ccc7e87c2eae2f13f191ab7c87cb07d17d7190ee Mon Sep 17 00:00:00 2001 From: cedricdcc Date: Thu, 30 Nov 2023 22:42:02 +0000 Subject: [PATCH 47/60] Automated python code formatting --- .../dereferencer-py/dereferencer/__init__.py | 2 +- .../dereferencer/derefEntity.py | 58 ++++++++++--------- .../dereferencer/dereference.py | 27 +++++---- .../dereferencer-py/dereferencer/graph.py | 6 +- 4 files changed, 51 insertions(+), 42 deletions(-) diff --git a/docker/dereferencer/dereferencer-py/dereferencer/__init__.py b/docker/dereferencer/dereferencer-py/dereferencer/__init__.py index 5f11648..f9d322f 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/__init__.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/__init__.py @@ -1,4 +1,4 @@ __all__ = ["DereferenceScheduler"] # Add scheduler path here -from .schedule import DereferenceScheduler \ No newline at end of file +from .schedule import DereferenceScheduler diff --git a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py index cae3a9e..9c1abcb 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -1,4 +1,5 @@ -# This file will contain all the classes and functions needed to complete the dereferencing process +# This file will contain all the classes and functions needed to complete +# the dereferencing process import logging from pathlib import Path @@ -10,33 +11,40 @@ log = logging.getLogger(__name__) + def metadata_folder_from_config(): local_default = str(resolve_path("./memory_dereferencer", versus="dotenv")) folder_name = os.getenv("METADATA_FILES_FOLDER", local_default) return folder_name + METADATA_FOLDER = metadata_folder_from_config() + def check_metadata_folder_exists(): metadata_folder = metadata_folder_from_config() if not os.path.exists(metadata_folder): - log.info(f"metadata folder {metadata_folder} does not exist, creating it") + log.info( + f"metadata folder {metadata_folder} does not exist, creating it") os.makedirs(metadata_folder) + METADATA_FILE = METADATA_FOLDER + "/metadata.json" + def make_metadata(input_list): result = [] if input_list is None: return result - + for item in input_list: if isinstance(item, dict): for key, values in item.items(): if isinstance(values, list): children = make_metadata(values) - result.append({key: {"completed": False, "children": children}}) + result.append( + {key: {"completed": False, "children": children}}) else: result.append({values: {"completed": False}}) elif isinstance(item, str): @@ -44,42 +52,44 @@ def make_metadata(input_list): return result -def save_metadata(uri:str,data:object,completed:bool=False): - """saves the metadata to file - """ - with open(METADATA_FILE, 'r') as f: + +def save_metadata(uri: str, data: object, completed: bool = False): + """saves the metadata to file""" + with open(METADATA_FILE, "r") as f: # read the json file current_metadata = json.load(f) # replace the metadata for the uri with the new metadata - current_metadata[uri] = {"completed":completed, "metadata": data} - with open(METADATA_FILE, 'w') as f: + current_metadata[uri] = {"completed": completed, "metadata": data} + with open(METADATA_FILE, "w") as f: json.dump(current_metadata, f) + class DerefUriEntity: - def __init__(self, uri:str, propertypaths:dict): + def __init__(self, uri: str, propertypaths: dict): self.uri = uri self.propertypathmetadata = None self.propertypaths = propertypaths - + check_metadata_folder_exists() if not os.path.exists(METADATA_FILE): - log.info(f"metadata file {METADATA_FILE} does not exist, creating it") - with open(METADATA_FILE, 'w') as f: + log.info( + f"metadata file {METADATA_FILE} does not exist, creating it") + with open(METADATA_FILE, "w") as f: json.dump({}, f) else: - with open(METADATA_FILE, 'r') as f: + with open(METADATA_FILE, "r") as f: self.metadata = json.load(f) self.propertypathmetadata = self.get_metadata_uri(uri) - + log.debug(f"propertypathmetadata: {self.propertypathmetadata}") - - #if propertyMetadata is None then run function make metadata + + # if propertyMetadata is None then run function make metadata if self.propertypathmetadata is None: self.propertypathmetadata = make_metadata(self.propertypaths) log.info(f"propertypathmetadata: {self.propertypathmetadata}") save_metadata(self.uri, self.propertypathmetadata) - - def get_metadata_uri(self, uri:str): + + def get_metadata_uri(self, uri: str): """checks if a given uri is in the metadata :param uri: uri to check @@ -90,11 +100,3 @@ def get_metadata_uri(self, uri:str): return self.metadata[uri]["metadata"] else: return None - - - - - - - - \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/dereferencer/dereference.py b/docker/dereferencer/dereferencer-py/dereferencer/dereference.py index 94be083..c7afeac 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/dereference.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/dereference.py @@ -12,41 +12,46 @@ log = logging.getLogger(__name__) + def config_path_from_config(): local_default = str(resolve_path("../configs", versus="dotenv")) folder_name = os.getenv("CONFIG_FILES_FOLDER", local_default) return Path(folder_name).absolute() + class Dereference: def __init__(self): pass def run_dereference(self): log.info("running dereference") - + config_folder_path = config_path_from_config() log.info(f"run_dereference on config files in {config_folder_path}") - # get all the config files in the config folder - # the files should be in yml or yaml format and should start with dereference + # get all the config files in the config folder + # the files should be in yml or yaml format and should start with + # dereference config_files = [f for f in config_folder_path.glob("dereference*.yml")] log.info(f"config files found: {config_files}") - + # for each config file , parse the file and get the config for config_file in config_files: log.info(f"config file: {config_file}") - with open(config_file, 'r') as stream: + with open(config_file, "r") as stream: try: config = yaml.safe_load(stream) log.info(f"config: {config}") - + sparql_query = config["SPARQL"] uri_list_from_query = uri_list(sparql_query) - - # make a derefEntity for each uri in the uri_list_from_query + + # make a derefEntity for each uri in the + # uri_list_from_query for uri in uri_list_from_query: log.info(f"uri: {uri}") - derefEntity = DerefUriEntity(uri, config["property_paths"]) + derefEntity = DerefUriEntity( + uri, config["property_paths"]) log.info(f"derefEntity: {derefEntity}") - + except yaml.YAMLError as exc: - log.error(exc) \ No newline at end of file + log.error(exc) diff --git a/docker/dereferencer/dereferencer-py/dereferencer/graph.py b/docker/dereferencer/dereferencer-py/dereferencer/graph.py index 00fa9ae..ba4cf51 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/graph.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/graph.py @@ -1,4 +1,5 @@ -# this file will contain the functions that will be used to query the graph database +# this file will contain the functions that will be used to query the +# graph database import logging from SPARQLWrapper import SPARQLWrapper, JSON @@ -9,6 +10,7 @@ log = logging.getLogger(__name__) + def gdb_from_config(): base = os.getenv("GDB_BASE", "http://localhost:7200") repoid = os.getenv("GDB_REPO", "lwua23") @@ -31,6 +33,7 @@ def gdb_from_config(): GDB = gdb_from_config() + def uri_list(query): """ Return a list of URI's from a query @@ -41,4 +44,3 @@ def uri_list(query): results = GDB.query().convert() log.debug(f"uri_list: results: {results}") return [result["s"]["value"] for result in results["results"]["bindings"]] - From 04803d57a4432ef3eaa277018385e5ab1553d3da Mon Sep 17 00:00:00 2001 From: cedricd Date: Fri, 1 Dec 2023 12:29:10 +0100 Subject: [PATCH 48/60] small updates lwua-ingest and added deref entity runs for orcid and mr --- configs/dereference_mr_test.yml | 11 +++ configs/dereference_test.yml | 6 +- configs/dereference_wrongSPARQL.yml | 8 ++ data/mr_regions_ldes_test.ttl | 89 +++++++++++++++++++ .../dereferencer/derefEntity.py | 54 +++++++++-- .../dereferencer-py/dereferencer/graph.py | 16 +++- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 6 +- docker/lwua-ingest/lwua-py/lwua/watcher.py | 6 +- docker/lwua-ingest/lwua-py/tests/test.ttl | 28 ++++++ .../lwua-ingest/lwua-py/tests/test_graphdb.py | 13 +-- .../lwua-ingest/lwua-py/tests/test_queries.py | 2 +- dotenv-example | 2 +- 12 files changed, 216 insertions(+), 25 deletions(-) create mode 100644 configs/dereference_mr_test.yml create mode 100644 configs/dereference_wrongSPARQL.yml create mode 100644 data/mr_regions_ldes_test.ttl create mode 100644 docker/lwua-ingest/lwua-py/tests/test.ttl diff --git a/configs/dereference_mr_test.yml b/configs/dereference_mr_test.yml new file mode 100644 index 0000000..e454530 --- /dev/null +++ b/configs/dereference_mr_test.yml @@ -0,0 +1,11 @@ +SPARQL: > + PREFIX dc: + SELECT ?o + WHERE { + ?s dc:isVersionOf ?o . + FILTER regex(str(?o), "marineregions.org") + } +property_paths: + - http://marineregions.org/ns/ontology#hasGeometry + - http://marineregions.org/ns/ontology#isPartOf: + - http://marineregions.org/ns/ontology#hasGeometry diff --git a/configs/dereference_test.yml b/configs/dereference_test.yml index 2030425..2c86d30 100644 --- a/configs/dereference_test.yml +++ b/configs/dereference_test.yml @@ -4,7 +4,5 @@ SPARQL: > SELECT ?s WHERE { ?s rdf:type schema:Person . - } LIMIT 100 -property_paths: - - \ No newline at end of file + } +property_paths: \ No newline at end of file diff --git a/configs/dereference_wrongSPARQL.yml b/configs/dereference_wrongSPARQL.yml new file mode 100644 index 0000000..903d9d1 --- /dev/null +++ b/configs/dereference_wrongSPARQL.yml @@ -0,0 +1,8 @@ +SPARQL: > + PREFIX rdf: + PREFIX schema: + SELECT ?s ?o + WHERE { + ?s rdf:type ?o . + } +property_paths: \ No newline at end of file diff --git a/data/mr_regions_ldes_test.ttl b/data/mr_regions_ldes_test.ttl new file mode 100644 index 0000000..4a9da1d --- /dev/null +++ b/data/mr_regions_ldes_test.ttl @@ -0,0 +1,89 @@ +@prefix tree: . +@prefix ldes: . +@prefix dc: . +@prefix sh: . +@prefix xsd: . +@prefix skos: . +@prefix gsp: . +@prefix dcat: . +@prefix mr: . +@prefix schema: . +@prefix rdf: . + + + a tree:Node ; + tree:relation [ tree:node ] ; + ldes:retentionPolicy [ + a ldes:LatestVersionSubset ; + ldes:amount 1 ; + ldes:versionKey ( dc:isVersionOf ) + ] . + + + a ldes:EventStream ; + tree:shape [ + a sh:NodeShape ; + sh:nodeKind sh:IRI ; + sh:property [ + sh:datatype xsd:dateTime ; + sh:minCount 1 ; + sh:path dc:modified + ], [ + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:path dc:isVersionOf + ], [ sh:path skos:note ], [ sh:path skos:historyNote ], [ + sh:datatype gsp:wktLiteral ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path dcat:centroid + ], [ + sh:datatype gsp:wktLiteral ; + sh:maxCount 1 ; + sh:minCount 0 ; + sh:path dcat:bbox + ], [ + sh:minCount 0 ; + sh:nodekind sh:IRI ; + sh:path mr:hasGeometry + ], [ + sh:minCount 0 ; + sh:node [ + a sh:NodeShape ; + sh:nodeKind sh:IRI ; + sh:property [ + sh:class schema:PropertyValue ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path schema:identifier + ], [ + sh:maxCount 1 ; + sh:minCount 1 ; + sh:nodeKind sh:IRI ; + sh:path schema:url + ] + ] ; + sh:path skos:exactMatch + ], [ + sh:datatype rdf:langString ; + sh:minCount 1 ; + sh:path skos:prefLabel + ], [ + sh:datatype rdf:langString ; + sh:minCount 0 ; + sh:path skos:altLabel + ], [ + sh:class mr:MRGeoObject ; + sh:minCount 0 ; + sh:nodeKind sh:IRI ; + sh:path mr:isRelatedTo + ] ; + sh:targetClass mr:MRGeoObject + ] ; + tree:view ; + tree:member . + + + dc:isVersionOf ; + dc:modified "2023-11-28T10:02:22Z"^^xsd:dateTime . + diff --git a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py index 9c1abcb..6e0a6c8 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -17,8 +17,10 @@ def metadata_folder_from_config(): folder_name = os.getenv("METADATA_FILES_FOLDER", local_default) return folder_name - -METADATA_FOLDER = metadata_folder_from_config() +def data_folder_from_config(): + local_default = str(resolve_path("../data", versus="dotenv")) + folder_name = os.getenv("DATA_FOLDER", local_default) + return folder_name def check_metadata_folder_exists(): @@ -27,8 +29,9 @@ def check_metadata_folder_exists(): log.info( f"metadata folder {metadata_folder} does not exist, creating it") os.makedirs(metadata_folder) - - + +DATA_FOLDER = data_folder_from_config() +METADATA_FOLDER = metadata_folder_from_config() METADATA_FILE = METADATA_FOLDER + "/metadata.json" @@ -88,8 +91,10 @@ def __init__(self, uri: str, propertypaths: dict): self.propertypathmetadata = make_metadata(self.propertypaths) log.info(f"propertypathmetadata: {self.propertypathmetadata}") save_metadata(self.uri, self.propertypathmetadata) - - def get_metadata_uri(self, uri: str): + self.download_uri(uri) + self.run_download_propertypaths() + + def get_metadata_uri(self, uri:str): """checks if a given uri is in the metadata :param uri: uri to check @@ -100,3 +105,40 @@ def get_metadata_uri(self, uri: str): return self.metadata[uri]["metadata"] else: return None + + def download_uri(self, uri:str): + """downloads the uri either in json-ld or ttl format and puts the result in the DATA_FOLDER + + :param uri: uri to download + :type uri: str + """ + log.info(f"downloading uri {uri}") + # perform request with accept header for json-ld or ttl + headers = {"Accept": "application/ld+json, text/turtle"} + r = requests.get(uri, headers=headers) + + # check if the request was successful and it returned a json-ld or ttl file + if r.status_code == 200 and ("application/ld+json" in r.headers["Content-Type"] or "text/turtle" in r.headers["Content-Type"]): + # write the file to disk + # TODO: check if the file already exists + # check if the file is json-ld or ttl and add the correct extension + if "application/ld+json" in r.headers["Content-Type"]: + filename = DATA_FOLDER + "/" + uri.split("/")[-1] + ".json" + elif "text/turtle" in r.headers["Content-Type"]: + filename = DATA_FOLDER + "/" + uri.split("/")[-1] + ".ttl" + with open(filename, "w") as f: + f.write(r.text) + log.info(f"file saved to {filename}") + return filename + else: + log.warning(f"request for {uri} failed with status code {r.status_code} and content type {r.headers['Content-Type']}") + return None + + + # This function cannot be run atm , first download main self.uri + def run_download_propertypaths(self): + """runs the download_propertypaths function for all propertypaths + """ + for propertypath in self.propertypathmetadata: + log.info(f"running download_propertypath for {propertypath}") + diff --git a/docker/dereferencer/dereferencer-py/dereferencer/graph.py b/docker/dereferencer/dereferencer-py/dereferencer/graph.py index ba4cf51..d2de240 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/graph.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/graph.py @@ -5,6 +5,7 @@ from SPARQLWrapper import SPARQLWrapper, JSON import logging import os +import re # from dotenv import load_dotenv @@ -39,8 +40,21 @@ def uri_list(query): Return a list of URI's from a query """ log.debug(f"uri_list: {query}") + + # Extract the variable from the SELECT clause + select_part = re.search('SELECT(.*)WHERE', query, re.IGNORECASE).group(1) + variables = select_part.split() + + # Check that there is exactly one variable in the SELECT part of the SPARQL query + if len(variables) != 1: + log.error("There should be exactly one variable in the SELECT part of the SPARQL query") + raise AssertionError("There should be exactly one variable in the SELECT part of the SPARQL query") + GDB.setQuery(query) GDB.setReturnFormat(JSON) results = GDB.query().convert() log.debug(f"uri_list: results: {results}") - return [result["s"]["value"] for result in results["results"]["bindings"]] + + # Use the extracted variable when getting the results + return [result[var]["value"] for result in results["results"]["bindings"]] + diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index 07615f0..f8d4583 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -46,10 +46,9 @@ def get_j2rdf_builder(): template_folder = resolve_path("./lwua/templates") log.info(f"template_folder == {template_folder}") # init J2RDFSyntaxBuilder - context = f"{URN_BASE}:ADMIN" j2rdf = J2RDFSyntaxBuilder( templates_folder=template_folder, - extra_functions={"registry_of_lastmod_context": context}, + extra_functions={"registry_of_lastmod_context": f"{URN_BASE}:ADMIN"}, ) return j2rdf @@ -202,9 +201,8 @@ def fname_2_context(fname: str): :return: The context corresponding to the filename. :rtype: str """ - base = os.getenv("URN_BASE", "urn:lwua:INGEST") fname = str(fname) - return f"{base}:{quote(fname)}" + return f"{URN_BASE}:{quote(fname)}" def get_registry_of_lastmod(): diff --git a/docker/lwua-ingest/lwua-py/lwua/watcher.py b/docker/lwua-ingest/lwua-py/lwua/watcher.py index 18eb08a..7bb3c1a 100644 --- a/docker/lwua-ingest/lwua-py/lwua/watcher.py +++ b/docker/lwua-ingest/lwua-py/lwua/watcher.py @@ -56,9 +56,9 @@ def report_changes(self, observer, known_lastmod_by_fname: dict = {}): if __name__ == "__main__": load_dotenv() enable_logging() - file_to_watch = resolve_path( + path_to_watch = resolve_path( os.getenv("GDB_DATA_FOLDER", "/root/graphdb-import/data"), "dotenv" ).absolute() - log.info(f"env pointing to { file_to_watch }") - w = Watcher(file_to_watch) + log.info(f"env pointing to { path_to_watch }") + w = Watcher(path_to_watch) w.run() diff --git a/docker/lwua-ingest/lwua-py/tests/test.ttl b/docker/lwua-ingest/lwua-py/tests/test.ttl new file mode 100644 index 0000000..7c862d0 --- /dev/null +++ b/docker/lwua-ingest/lwua-py/tests/test.ttl @@ -0,0 +1,28 @@ +@prefix schema: . +@prefix xsd: . +@prefix cc: . + + +#todo root this project somewhere decent -- maybe zenodo-github-doi ? + a schema:Project ; + schema:name "Lifewatch User Analysis 2023"^^xsd:string ; + schema:description "This project performs the 2023 Lifewatch User Analysis Reporting."^^xsd:string ; + schema:license ; + schema:accountablePerson ; + schema:contributor ; + schema:creator . + + a schema:Person ; + schema:name "Laurian van Maldeghem"^^xsd:string . + + a schema:Person ; + schema:name "Marque Portier"^^xsd:string . + + a schema:Organization ; + schema:name "VLIZ vzw"^^xsd:string . + + a cc:License ; + cc:legalcode ; + cc:permits cc:Reproduction, cc:Distribution, cc:DerivativeWorks ; + cc:requires cc:Notice ; + cc:prohibits cc:CCCommercialUse . \ No newline at end of file diff --git a/docker/lwua-ingest/lwua-py/tests/test_graphdb.py b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py index 964e014..ef455db 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_graphdb.py +++ b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py @@ -6,8 +6,9 @@ suffix_2_format, read_graph, convert_results_registry_of_lastmod, -) # replace 'your_module_path' with the actual module path +) from lwua.ingest import data_path_from_config +import os results = { "head": {"vars": ["graph", "lastmod"]}, @@ -33,13 +34,13 @@ def test_context_2_fname(): def get_registry_of_lastmod(results): # Act converted = convert_results_registry_of_lastmod(results) + test_path = Path("test_file.txt") + test_date = datetime.fromisoformat("2022-01-01T00:00:00") # Assert assert isinstance(converted, dict) assert len(converted) == 1 - assert converted[Path("test_file.txt")] == datetime.fromisoformat( - "2022-01-01T00:00:00" - ) + assert converted[test_path] == test_date def test_suffix_2_format(): @@ -55,7 +56,9 @@ def test_suffix_2_format(): def test_read_graph(): # Arrange - fpath = data_path_from_config() / "project.ttl" # replace with a test file path + test_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test.ttl") + + fpath = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test.ttl") format = "turtle" # Act diff --git a/docker/lwua-ingest/lwua-py/tests/test_queries.py b/docker/lwua-ingest/lwua-py/tests/test_queries.py index fc6efe2..2a7b12a 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_queries.py +++ b/docker/lwua-ingest/lwua-py/tests/test_queries.py @@ -3,8 +3,8 @@ from pyrdfj2 import J2RDFSyntaxBuilder from lwua.helpers import resolve_path import os +from .graphdb import URN_BASE -URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") def get_j2rdf_builder(): diff --git a/dotenv-example b/dotenv-example index f1d979e..a32b984 100644 --- a/dotenv-example +++ b/dotenv-example @@ -5,5 +5,5 @@ LOGCONF=debug-logconf.yml GDB_REPO="lwua23" # scheduler settings -SCHEDULER_PERIOD=5 # seconds +SCHEDULER_PERIOD=5 # seconds | time bewteen scheduler runs SCHEDULER_WAIT=5 # seconds | this is needed because the graphdb container takes some time to start, else the first run will be an error From b4fcec9e0a7030bea54417b6d4b6c2e16fe73e21 Mon Sep 17 00:00:00 2001 From: cedricdcc Date: Fri, 1 Dec 2023 11:29:38 +0000 Subject: [PATCH 49/60] Automated python code formatting --- .../dereferencer/derefEntity.py | 35 +++++++++++-------- .../dereferencer-py/dereferencer/graph.py | 16 +++++---- .../lwua-ingest/lwua-py/tests/test_graphdb.py | 13 ++++--- .../lwua-ingest/lwua-py/tests/test_queries.py | 1 - 4 files changed, 39 insertions(+), 26 deletions(-) diff --git a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py index 6e0a6c8..202d247 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -17,6 +17,7 @@ def metadata_folder_from_config(): folder_name = os.getenv("METADATA_FILES_FOLDER", local_default) return folder_name + def data_folder_from_config(): local_default = str(resolve_path("../data", versus="dotenv")) folder_name = os.getenv("DATA_FOLDER", local_default) @@ -29,7 +30,8 @@ def check_metadata_folder_exists(): log.info( f"metadata folder {metadata_folder} does not exist, creating it") os.makedirs(metadata_folder) - + + DATA_FOLDER = data_folder_from_config() METADATA_FOLDER = metadata_folder_from_config() METADATA_FILE = METADATA_FOLDER + "/metadata.json" @@ -93,8 +95,8 @@ def __init__(self, uri: str, propertypaths: dict): save_metadata(self.uri, self.propertypathmetadata) self.download_uri(uri) self.run_download_propertypaths() - - def get_metadata_uri(self, uri:str): + + def get_metadata_uri(self, uri: str): """checks if a given uri is in the metadata :param uri: uri to check @@ -105,10 +107,10 @@ def get_metadata_uri(self, uri:str): return self.metadata[uri]["metadata"] else: return None - - def download_uri(self, uri:str): + + def download_uri(self, uri: str): """downloads the uri either in json-ld or ttl format and puts the result in the DATA_FOLDER - + :param uri: uri to download :type uri: str """ @@ -116,9 +118,13 @@ def download_uri(self, uri:str): # perform request with accept header for json-ld or ttl headers = {"Accept": "application/ld+json, text/turtle"} r = requests.get(uri, headers=headers) - - # check if the request was successful and it returned a json-ld or ttl file - if r.status_code == 200 and ("application/ld+json" in r.headers["Content-Type"] or "text/turtle" in r.headers["Content-Type"]): + + # check if the request was successful and it returned a json-ld or ttl + # file + if r.status_code == 200 and ( + "application/ld+json" in r.headers["Content-Type"] + or "text/turtle" in r.headers["Content-Type"] + ): # write the file to disk # TODO: check if the file already exists # check if the file is json-ld or ttl and add the correct extension @@ -131,14 +137,13 @@ def download_uri(self, uri:str): log.info(f"file saved to {filename}") return filename else: - log.warning(f"request for {uri} failed with status code {r.status_code} and content type {r.headers['Content-Type']}") + log.warning( + f"request for {uri} failed with status code {r.status_code} and content type {r.headers['Content-Type']}" + ) return None - - + # This function cannot be run atm , first download main self.uri def run_download_propertypaths(self): - """runs the download_propertypaths function for all propertypaths - """ + """runs the download_propertypaths function for all propertypaths""" for propertypath in self.propertypathmetadata: log.info(f"running download_propertypath for {propertypath}") - diff --git a/docker/dereferencer/dereferencer-py/dereferencer/graph.py b/docker/dereferencer/dereferencer-py/dereferencer/graph.py index d2de240..a73b960 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/graph.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/graph.py @@ -42,13 +42,18 @@ def uri_list(query): log.debug(f"uri_list: {query}") # Extract the variable from the SELECT clause - select_part = re.search('SELECT(.*)WHERE', query, re.IGNORECASE).group(1) + select_part = re.search("SELECT(.*)WHERE", query, re.IGNORECASE).group(1) variables = select_part.split() - - # Check that there is exactly one variable in the SELECT part of the SPARQL query + + # Check that there is exactly one variable in the SELECT part of the + # SPARQL query if len(variables) != 1: - log.error("There should be exactly one variable in the SELECT part of the SPARQL query") - raise AssertionError("There should be exactly one variable in the SELECT part of the SPARQL query") + log.error( + "There should be exactly one variable in the SELECT part of the SPARQL query" + ) + raise AssertionError( + "There should be exactly one variable in the SELECT part of the SPARQL query" + ) GDB.setQuery(query) GDB.setReturnFormat(JSON) @@ -57,4 +62,3 @@ def uri_list(query): # Use the extracted variable when getting the results return [result[var]["value"] for result in results["results"]["bindings"]] - diff --git a/docker/lwua-ingest/lwua-py/tests/test_graphdb.py b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py index ef455db..da40186 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_graphdb.py +++ b/docker/lwua-ingest/lwua-py/tests/test_graphdb.py @@ -6,7 +6,7 @@ suffix_2_format, read_graph, convert_results_registry_of_lastmod, -) +) from lwua.ingest import data_path_from_config import os @@ -56,9 +56,14 @@ def test_suffix_2_format(): def test_read_graph(): # Arrange - test_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test.ttl") - - fpath = os.path.join(os.path.dirname(os.path.realpath(__file__)), "test.ttl") + test_file_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "test.ttl" + ) + + fpath = os.path.join( + os.path.dirname( + os.path.realpath(__file__)), + "test.ttl") format = "turtle" # Act diff --git a/docker/lwua-ingest/lwua-py/tests/test_queries.py b/docker/lwua-ingest/lwua-py/tests/test_queries.py index 2a7b12a..d49a4c9 100644 --- a/docker/lwua-ingest/lwua-py/tests/test_queries.py +++ b/docker/lwua-ingest/lwua-py/tests/test_queries.py @@ -6,7 +6,6 @@ from .graphdb import URN_BASE - def get_j2rdf_builder(): template_folder = os.path.join( os.path.dirname(__file__), From 29417c6a097d8e19e1ccb1f79a718efd1983f2e7 Mon Sep 17 00:00:00 2001 From: cedricd Date: Fri, 1 Dec 2023 18:06:12 +0100 Subject: [PATCH 50/60] deleted metadata management for now in search for more favorable system --- configs/dereference_mr_test.yml | 1 + ...erence_wrongSPARQL.yml => wrongSPARQL.yml} | 0 data/project.ttl | 2 +- .../dereferencer/derefEntity.py | 81 +----------------- .../dereferencer-py/dereferencer/graph.py | 84 +++++++++++++++++-- .../templates/lastmod_info.sparql | 9 ++ 6 files changed, 89 insertions(+), 88 deletions(-) rename configs/{dereference_wrongSPARQL.yml => wrongSPARQL.yml} (100%) create mode 100644 docker/dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql diff --git a/configs/dereference_mr_test.yml b/configs/dereference_mr_test.yml index e454530..ee64e7e 100644 --- a/configs/dereference_mr_test.yml +++ b/configs/dereference_mr_test.yml @@ -1,6 +1,7 @@ SPARQL: > PREFIX dc: SELECT ?o + FROM WHERE { ?s dc:isVersionOf ?o . FILTER regex(str(?o), "marineregions.org") diff --git a/configs/dereference_wrongSPARQL.yml b/configs/wrongSPARQL.yml similarity index 100% rename from configs/dereference_wrongSPARQL.yml rename to configs/wrongSPARQL.yml diff --git a/data/project.ttl b/data/project.ttl index 7c862d0..8ba941a 100644 --- a/data/project.ttl +++ b/data/project.ttl @@ -16,7 +16,7 @@ schema:name "Laurian van Maldeghem"^^xsd:string . a schema:Person ; - schema:name "Marque Portier"^^xsd:string . + schema:name "Marc Portier"^^xsd:string . a schema:Organization ; schema:name "VLIZ vzw"^^xsd:string . diff --git a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py index 202d247..a3bf183 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -12,61 +12,15 @@ log = logging.getLogger(__name__) -def metadata_folder_from_config(): - local_default = str(resolve_path("./memory_dereferencer", versus="dotenv")) - folder_name = os.getenv("METADATA_FILES_FOLDER", local_default) - return folder_name - - def data_folder_from_config(): local_default = str(resolve_path("../data", versus="dotenv")) folder_name = os.getenv("DATA_FOLDER", local_default) return folder_name -def check_metadata_folder_exists(): - metadata_folder = metadata_folder_from_config() - if not os.path.exists(metadata_folder): - log.info( - f"metadata folder {metadata_folder} does not exist, creating it") - os.makedirs(metadata_folder) DATA_FOLDER = data_folder_from_config() -METADATA_FOLDER = metadata_folder_from_config() -METADATA_FILE = METADATA_FOLDER + "/metadata.json" - - -def make_metadata(input_list): - result = [] - - if input_list is None: - return result - - for item in input_list: - if isinstance(item, dict): - for key, values in item.items(): - if isinstance(values, list): - children = make_metadata(values) - result.append( - {key: {"completed": False, "children": children}}) - else: - result.append({values: {"completed": False}}) - elif isinstance(item, str): - result.append({item: {"completed": False}}) - - return result - - -def save_metadata(uri: str, data: object, completed: bool = False): - """saves the metadata to file""" - with open(METADATA_FILE, "r") as f: - # read the json file - current_metadata = json.load(f) - # replace the metadata for the uri with the new metadata - current_metadata[uri] = {"completed": completed, "metadata": data} - with open(METADATA_FILE, "w") as f: - json.dump(current_metadata, f) class DerefUriEntity: @@ -75,38 +29,9 @@ def __init__(self, uri: str, propertypaths: dict): self.propertypathmetadata = None self.propertypaths = propertypaths - check_metadata_folder_exists() - if not os.path.exists(METADATA_FILE): - log.info( - f"metadata file {METADATA_FILE} does not exist, creating it") - with open(METADATA_FILE, "w") as f: - json.dump({}, f) - else: - with open(METADATA_FILE, "r") as f: - self.metadata = json.load(f) - self.propertypathmetadata = self.get_metadata_uri(uri) - - log.debug(f"propertypathmetadata: {self.propertypathmetadata}") - - # if propertyMetadata is None then run function make metadata - if self.propertypathmetadata is None: - self.propertypathmetadata = make_metadata(self.propertypaths) - log.info(f"propertypathmetadata: {self.propertypathmetadata}") - save_metadata(self.uri, self.propertypathmetadata) self.download_uri(uri) - self.run_download_propertypaths() - - def get_metadata_uri(self, uri: str): - """checks if a given uri is in the metadata - - :param uri: uri to check - :type uri: str - :return: dict with the metadata for the uri - """ - if uri in self.metadata: - return self.metadata[uri]["metadata"] - else: - return None + if propertypaths is not None: + self.run_download_propertypaths() def download_uri(self, uri: str): """downloads the uri either in json-ld or ttl format and puts the result in the DATA_FOLDER @@ -145,5 +70,5 @@ def download_uri(self, uri: str): # This function cannot be run atm , first download main self.uri def run_download_propertypaths(self): """runs the download_propertypaths function for all propertypaths""" - for propertypath in self.propertypathmetadata: + for propertypath in self.propertypaths: log.info(f"running download_propertypath for {propertypath}") diff --git a/docker/dereferencer/dereferencer-py/dereferencer/graph.py b/docker/dereferencer/dereferencer-py/dereferencer/graph.py index a73b960..6c39bbc 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/graph.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/graph.py @@ -3,6 +3,7 @@ import logging from SPARQLWrapper import SPARQLWrapper, JSON +from urllib.parse import unquote, quote import logging import os import re @@ -10,6 +11,7 @@ # from dotenv import load_dotenv log = logging.getLogger(__name__) +URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") def gdb_from_config(): @@ -35,6 +37,72 @@ def gdb_from_config(): GDB = gdb_from_config() +def get_j2rdf_builder(): + template_folder = resolve_path("./lwua/templates") + log.info(f"template_folder == {template_folder}") + # init J2RDFSyntaxBuilder + j2rdf = J2RDFSyntaxBuilder( + templates_folder=template_folder, + extra_functions={"registry_of_lastmod_context": f"{URN_BASE}:ADMIN"}, + ) + return j2rdf + + +J2RDF = get_j2rdf_builder() + +def get_registry_of_lastmod(): + log.info(f"getting last modified graph") + + template = "lastmod_info.sparql" + vars = {} + query = J2RDF.build_syntax(template, **vars) + # log.debug(f"get_admin_graph query == {query}") + GDB.setQuery(query) + GDB.setReturnFormat(JSON) + results = GDB.query().convert() + + # convert {'head': {'vars': ['graph', 'lastmod']}, 'results': {'bindings': []}} to [{PosixPath('graph'): lastmod}] + # URI must be substracted from graph context and datetime str must be + # converted to epoch + + converted = {} + return convert_results_registry_of_lastmod(results) + +def convert_results_registry_of_lastmod(results): + converted = {} + for g in results["results"]["bindings"]: + path = context_2_fname(g["graph"]["value"]) + time = datetime.fromisoformat(g["lastmod"]["value"]) + converted[path] = time + return converted + +def context_2_fname(context: str): + """ + Convert a context to a filename path. + + :param context: The context to convert. + :type context: str + :return: The filename corresponding to the context. + :rtype: str + """ + assert context.startswith( + URN_BASE), f"Context {context} is not IRI compliant" + return unquote(context[len(URN_BASE) + 1:]) + + +def fname_2_context(fname: str): + """ + Convert a filename to a context. + + :param fname: The filename to convert. + :type fname: str + :return: The context corresponding to the filename. + :rtype: str + """ + fname = str(fname) + return f"{URN_BASE}:{quote(fname)}" + + def uri_list(query): """ Return a list of URI's from a query @@ -42,18 +110,16 @@ def uri_list(query): log.debug(f"uri_list: {query}") # Extract the variable from the SELECT clause - select_part = re.search("SELECT(.*)WHERE", query, re.IGNORECASE).group(1) + select_part = re.search('SELECT(.*)WHERE', query, re.IGNORECASE).group(1) variables = select_part.split() - # Check that there is exactly one variable in the SELECT part of the - # SPARQL query + # Check that there is exactly one variable in the SELECT part of the SPARQL query if len(variables) != 1: - log.error( - "There should be exactly one variable in the SELECT part of the SPARQL query" - ) - raise AssertionError( - "There should be exactly one variable in the SELECT part of the SPARQL query" - ) + error_message = f"There should be exactly one variable in the SELECT part of the SPARQL query but found {len(variables)} in {variables}" + log.error(error_message) + raise ValueError(error_message) + + var = variables[0][1:] # remove the ? from the variable GDB.setQuery(query) GDB.setReturnFormat(JSON) diff --git a/docker/dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql b/docker/dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql new file mode 100644 index 0000000..62464ed --- /dev/null +++ b/docker/dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql @@ -0,0 +1,9 @@ +{# + This template is used to generate SPARQL SELECT queries. + This template takes 1 parameter: + - registry_of_lastmod_context: the context from which the data is to be selected +#} + +SELECT ?graph ?lastmod WHERE { + GRAPH <{{ registry_of_lastmod_context }}> { ?graph ?lastmod } +} \ No newline at end of file From f070d16a026baca58df6f77c44c6a32be360b47c Mon Sep 17 00:00:00 2001 From: cedricdcc Date: Fri, 1 Dec 2023 17:06:45 +0000 Subject: [PATCH 51/60] Automated python code formatting --- .../dereferencer-py/dereferencer/derefEntity.py | 2 -- .../dereferencer/dereferencer-py/dereferencer/graph.py | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py index a3bf183..8f0ded6 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -18,8 +18,6 @@ def data_folder_from_config(): return folder_name - - DATA_FOLDER = data_folder_from_config() diff --git a/docker/dereferencer/dereferencer-py/dereferencer/graph.py b/docker/dereferencer/dereferencer-py/dereferencer/graph.py index 6c39bbc..8281f56 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/graph.py +++ b/docker/dereferencer/dereferencer-py/dereferencer/graph.py @@ -50,6 +50,7 @@ def get_j2rdf_builder(): J2RDF = get_j2rdf_builder() + def get_registry_of_lastmod(): log.info(f"getting last modified graph") @@ -68,6 +69,7 @@ def get_registry_of_lastmod(): converted = {} return convert_results_registry_of_lastmod(results) + def convert_results_registry_of_lastmod(results): converted = {} for g in results["results"]["bindings"]: @@ -76,6 +78,7 @@ def convert_results_registry_of_lastmod(results): converted[path] = time return converted + def context_2_fname(context: str): """ Convert a context to a filename path. @@ -110,16 +113,17 @@ def uri_list(query): log.debug(f"uri_list: {query}") # Extract the variable from the SELECT clause - select_part = re.search('SELECT(.*)WHERE', query, re.IGNORECASE).group(1) + select_part = re.search("SELECT(.*)WHERE", query, re.IGNORECASE).group(1) variables = select_part.split() - # Check that there is exactly one variable in the SELECT part of the SPARQL query + # Check that there is exactly one variable in the SELECT part of the + # SPARQL query if len(variables) != 1: error_message = f"There should be exactly one variable in the SELECT part of the SPARQL query but found {len(variables)} in {variables}" log.error(error_message) raise ValueError(error_message) - var = variables[0][1:] # remove the ? from the variable + var = variables[0][1:] # remove the ? from the variable GDB.setQuery(query) GDB.setReturnFormat(JSON) From 1b5963d5d12dfe99356618cf53656d4404a496a9 Mon Sep 17 00:00:00 2001 From: cedricd Date: Sat, 2 Dec 2023 00:12:24 +0100 Subject: [PATCH 52/60] working dereferencer --- configs/dereference_mr_test.yml | 3 + .../dereferencer/derefEntity.py | 72 ---------- docker/docker-compose.yml | 2 +- .../Dockerfile | 0 .../dereferencer-py/debug-logconf.yml | 0 .../dereferencer-py/dereferencer/__init__.py | 0 .../dereferencer-py/dereferencer/__main__.py | 0 .../dereferencer-py/dereferencer/daemon.py | 0 .../dereferencer/derefEntity.py | 129 ++++++++++++++++++ .../dereferencer/dereference.py | 0 .../dereferencer-py/dereferencer/graph.py | 32 ++++- .../dereferencer-py/dereferencer/helpers.py | 0 .../dereferencer-py/dereferencer/schedule.py | 0 .../templates/deref_property_path.sparql | 14 ++ .../templates/lastmod_info.sparql | 0 .../dereferencer-py/logging | 0 .../dereferencer-py/poetry.lock | 0 .../dereferencer-py/pyproject.toml | 0 18 files changed, 177 insertions(+), 75 deletions(-) delete mode 100644 docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py rename docker/{dereferencer => lwua-dereferencer}/Dockerfile (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/debug-logconf.yml (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/dereferencer/__init__.py (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/dereferencer/__main__.py (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/dereferencer/daemon.py (100%) create mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/dereferencer/dereference.py (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/dereferencer/graph.py (78%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/dereferencer/helpers.py (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/dereferencer/schedule.py (100%) create mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/deref_property_path.sparql rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/dereferencer/templates/lastmod_info.sparql (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/logging (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/poetry.lock (100%) rename docker/{dereferencer => lwua-dereferencer}/dereferencer-py/pyproject.toml (100%) diff --git a/configs/dereference_mr_test.yml b/configs/dereference_mr_test.yml index ee64e7e..6ca13b0 100644 --- a/configs/dereference_mr_test.yml +++ b/configs/dereference_mr_test.yml @@ -10,3 +10,6 @@ property_paths: - http://marineregions.org/ns/ontology#hasGeometry - http://marineregions.org/ns/ontology#isPartOf: - http://marineregions.org/ns/ontology#hasGeometry + - https://schema.org/geo: + - https://schema.org/latitude + - https://schema.org/longitude diff --git a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py deleted file mode 100644 index 8f0ded6..0000000 --- a/docker/dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ /dev/null @@ -1,72 +0,0 @@ -# This file will contain all the classes and functions needed to complete -# the dereferencing process - -import logging -from pathlib import Path -import json -import requests -import os - -from .helpers import resolve_path - -log = logging.getLogger(__name__) - - -def data_folder_from_config(): - local_default = str(resolve_path("../data", versus="dotenv")) - folder_name = os.getenv("DATA_FOLDER", local_default) - return folder_name - - -DATA_FOLDER = data_folder_from_config() - - -class DerefUriEntity: - def __init__(self, uri: str, propertypaths: dict): - self.uri = uri - self.propertypathmetadata = None - self.propertypaths = propertypaths - - self.download_uri(uri) - if propertypaths is not None: - self.run_download_propertypaths() - - def download_uri(self, uri: str): - """downloads the uri either in json-ld or ttl format and puts the result in the DATA_FOLDER - - :param uri: uri to download - :type uri: str - """ - log.info(f"downloading uri {uri}") - # perform request with accept header for json-ld or ttl - headers = {"Accept": "application/ld+json, text/turtle"} - r = requests.get(uri, headers=headers) - - # check if the request was successful and it returned a json-ld or ttl - # file - if r.status_code == 200 and ( - "application/ld+json" in r.headers["Content-Type"] - or "text/turtle" in r.headers["Content-Type"] - ): - # write the file to disk - # TODO: check if the file already exists - # check if the file is json-ld or ttl and add the correct extension - if "application/ld+json" in r.headers["Content-Type"]: - filename = DATA_FOLDER + "/" + uri.split("/")[-1] + ".json" - elif "text/turtle" in r.headers["Content-Type"]: - filename = DATA_FOLDER + "/" + uri.split("/")[-1] + ".ttl" - with open(filename, "w") as f: - f.write(r.text) - log.info(f"file saved to {filename}") - return filename - else: - log.warning( - f"request for {uri} failed with status code {r.status_code} and content type {r.headers['Content-Type']}" - ) - return None - - # This function cannot be run atm , first download main self.uri - def run_download_propertypaths(self): - """runs the download_propertypaths function for all propertypaths""" - for propertypath in self.propertypaths: - log.info(f"running download_propertypath for {propertypath}") diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 95e3743..64cb5ee 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -36,7 +36,7 @@ services: dereferencer: build: - context: ./dereferencer/ + context: ./lwua-dereferencer/ #args: image: lwua/dereferencer container_name: dereferencer diff --git a/docker/dereferencer/Dockerfile b/docker/lwua-dereferencer/Dockerfile similarity index 100% rename from docker/dereferencer/Dockerfile rename to docker/lwua-dereferencer/Dockerfile diff --git a/docker/dereferencer/dereferencer-py/debug-logconf.yml b/docker/lwua-dereferencer/dereferencer-py/debug-logconf.yml similarity index 100% rename from docker/dereferencer/dereferencer-py/debug-logconf.yml rename to docker/lwua-dereferencer/dereferencer-py/debug-logconf.yml diff --git a/docker/dereferencer/dereferencer-py/dereferencer/__init__.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/__init__.py similarity index 100% rename from docker/dereferencer/dereferencer-py/dereferencer/__init__.py rename to docker/lwua-dereferencer/dereferencer-py/dereferencer/__init__.py diff --git a/docker/dereferencer/dereferencer-py/dereferencer/__main__.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/__main__.py similarity index 100% rename from docker/dereferencer/dereferencer-py/dereferencer/__main__.py rename to docker/lwua-dereferencer/dereferencer-py/dereferencer/__main__.py diff --git a/docker/dereferencer/dereferencer-py/dereferencer/daemon.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/daemon.py similarity index 100% rename from docker/dereferencer/dereferencer-py/dereferencer/daemon.py rename to docker/lwua-dereferencer/dereferencer-py/dereferencer/daemon.py diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py new file mode 100644 index 0000000..a457c72 --- /dev/null +++ b/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -0,0 +1,129 @@ +# This file will contain all the classes and functions needed to complete +# the dereferencing process + +import logging +from pathlib import Path +import json +import requests +import os + +from .helpers import resolve_path +from .graph import uri_list_from_deref_property_path + +log = logging.getLogger(__name__) + + +def data_folder_from_config(): + local_default = str(resolve_path("../data", versus="dotenv")) + folder_name = os.getenv("DATA_FOLDER", local_default) + return folder_name + + +def url_2_fname(url: str): + """ + Convert a URL to a filename. + + :param url: The URL to convert. + :type url: str + :return: The filename corresponding to the URL. + :rtype: str + """ + return url.replace(":", "_").replace("/", "_").replace("?", "_").replace("&", "_").replace("=", "_").replace("#", "_") + + +DATA_FOLDER = data_folder_from_config() + + +def run_download_propertypaths(propertypaths, uri, file_name, download_uri): + """runs the download_propertypaths function for all propertypaths""" + for propertypath in propertypaths: + log.info(f"running download_propertypath for {propertypath}") + # if propertypath is a string, download it + if isinstance(propertypath, str): + log.info(f"propertypath is a string: {propertypath}, downloading") + to_download = uri_list_from_deref_property_path(uri, file_name, propertypath) + for uri in to_download: + if get_uri_downloaded(url_2_fname(uri)) is None: + download_uri(uri, url_2_fname(uri)) + if isinstance(propertypath, dict): + log.info(f"propertypath is a dict: {propertypath}, downloading") + # property path is the key of the dict + property_to_search = list(propertypath.keys())[0] + log.info(f"property_to_search: {property_to_search}") + + to_download = uri_list_from_deref_property_path(uri, file_name, property_to_search) + for uri in to_download: + file_name = get_uri_downloaded(url_2_fname(uri)) + if file_name is None: + file_name = download_uri(uri, url_2_fname(uri)) + uri = uri + propertypath = propertypath[property_to_search] + run_download_propertypaths(propertypath, uri, file_name, download_uri) + +def download_uri(uri: str, file_name: str): + """downloads the uri either in json-ld or ttl format and puts the result in the DATA_FOLDER + + :param uri: uri to download + :type uri: str + """ + log.info(f"downloading uri {uri}") + # perform request with accept header for json-ld or ttl + headers = {"Accept": "application/ld+json, text/turtle"} + r = requests.get(uri, headers=headers) + + # check if the request was successful and it returned a json-ld or ttl + # file + if r.status_code == 200 and ( + "application/ld+json" in r.headers["Content-Type"] + or "text/turtle" in r.headers["Content-Type"] + ): + # write the file to disk + # TODO: check if the file already exists + # check if the file is json-ld or ttl and add the correct extension + if "application/ld+json" in r.headers["Content-Type"]: + filename = DATA_FOLDER + "/" + file_name + ".json" + elif "text/turtle" in r.headers["Content-Type"]: + filename = DATA_FOLDER + "/" + file_name + ".ttl" + with open(filename, "w") as f: + f.write(r.text) + log.info(f"file saved to {filename}") + filename = filename.replace(DATA_FOLDER, "/data") + return filename + else: + log.warning( + f"request for {uri} failed with status code {r.status_code} and content type {r.headers['Content-Type']}" + ) + return None + +def get_uri_downloaded(file_name: str): + """gets the filename of the uri if it is already downloaded + + :param file_name: filename of the uri + :type file_name: str + :return: filename of the uri if it is already downloaded + :rtype: str + """ + + if os.path.isfile(DATA_FOLDER + "/" + file_name + ".json"): + filename = DATA_FOLDER + "/" + file_name + ".json" + return filename.replace(DATA_FOLDER, "/data") + if os.path.isfile(DATA_FOLDER + "/" + file_name + ".ttl"): + filename = DATA_FOLDER + "/" + file_name + ".ttl" + return filename.replace(DATA_FOLDER, "/data") + return None + + +class DerefUriEntity: + def __init__(self, uri: str, propertypaths: dict): + self.uri = uri + self.propertypathmetadata = None + self.propertypaths = propertypaths + self.file_name = url_2_fname(uri) + self.filename = get_uri_downloaded(self.file_name) + if self.filename is None: + log.info(f"uri {uri} not downloaded yet") + self.filename = download_uri(uri, self.file_name) + + if propertypaths is not None: + run_download_propertypaths(self.propertypaths, self.uri, self.filename, download_uri) + diff --git a/docker/dereferencer/dereferencer-py/dereferencer/dereference.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py similarity index 100% rename from docker/dereferencer/dereferencer-py/dereferencer/dereference.py rename to docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py diff --git a/docker/dereferencer/dereferencer-py/dereferencer/graph.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py similarity index 78% rename from docker/dereferencer/dereferencer-py/dereferencer/graph.py rename to docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py index 8281f56..094c757 100644 --- a/docker/dereferencer/dereferencer-py/dereferencer/graph.py +++ b/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py @@ -4,9 +4,11 @@ import logging from SPARQLWrapper import SPARQLWrapper, JSON from urllib.parse import unquote, quote +from pyrdfj2 import J2RDFSyntaxBuilder import logging import os import re +from .helpers import resolve_path # from dotenv import load_dotenv @@ -38,7 +40,7 @@ def gdb_from_config(): def get_j2rdf_builder(): - template_folder = resolve_path("./lwua/templates") + template_folder = resolve_path("./dereferencer/templates") log.info(f"template_folder == {template_folder}") # init J2RDFSyntaxBuilder j2rdf = J2RDFSyntaxBuilder( @@ -113,7 +115,7 @@ def uri_list(query): log.debug(f"uri_list: {query}") # Extract the variable from the SELECT clause - select_part = re.search("SELECT(.*)WHERE", query, re.IGNORECASE).group(1) + select_part = re.search('SELECT(.*?)(FROM|WHERE)', query, re.IGNORECASE).group(1) variables = select_part.split() # Check that there is exactly one variable in the SELECT part of the @@ -132,3 +134,29 @@ def uri_list(query): # Use the extracted variable when getting the results return [result[var]["value"] for result in results["results"]["bindings"]] + +def uri_list_from_deref_property_path(url,filename,propertypath): + """ + Return a list of URI's from a query + """ + log.debug(f"uri_list: {propertypath}") + + template = "deref_property_path.sparql" + + graph = fname_2_context(filename) + + vars = { + "graph": graph, + "subject": url, + "property": propertypath + } + + query = J2RDF.build_syntax(template, **vars) + log.debug(f"uri_list_from_deref_property_path query == {query}") + GDB.setQuery(query) + GDB.setReturnFormat(JSON) + results = GDB.query().convert() + log.debug(f"uri_list_from_deref_property_path results: {results}") + + # Use the extracted variable when getting the results + return [result["o"]["value"] for result in results["results"]["bindings"]] \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/dereferencer/helpers.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/helpers.py similarity index 100% rename from docker/dereferencer/dereferencer-py/dereferencer/helpers.py rename to docker/lwua-dereferencer/dereferencer-py/dereferencer/helpers.py diff --git a/docker/dereferencer/dereferencer-py/dereferencer/schedule.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/schedule.py similarity index 100% rename from docker/dereferencer/dereferencer-py/dereferencer/schedule.py rename to docker/lwua-dereferencer/dereferencer-py/dereferencer/schedule.py diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/deref_property_path.sparql b/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/deref_property_path.sparql new file mode 100644 index 0000000..98e345a --- /dev/null +++ b/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/deref_property_path.sparql @@ -0,0 +1,14 @@ +{# + This Template is used to make a SPARQL query that will return the objects of a given property + This template takes in 3 parameters: + - The URI of the property + - The URI of the subject + - The URI of the graph +#} + +SELECT ?o +WHERE { + GRAPH <{{ graph }}> { + <{{ subject }}> <{{ property }}> ?o . + } +} \ No newline at end of file diff --git a/docker/dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql b/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql similarity index 100% rename from docker/dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql rename to docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql diff --git a/docker/dereferencer/dereferencer-py/logging b/docker/lwua-dereferencer/dereferencer-py/logging similarity index 100% rename from docker/dereferencer/dereferencer-py/logging rename to docker/lwua-dereferencer/dereferencer-py/logging diff --git a/docker/dereferencer/dereferencer-py/poetry.lock b/docker/lwua-dereferencer/dereferencer-py/poetry.lock similarity index 100% rename from docker/dereferencer/dereferencer-py/poetry.lock rename to docker/lwua-dereferencer/dereferencer-py/poetry.lock diff --git a/docker/dereferencer/dereferencer-py/pyproject.toml b/docker/lwua-dereferencer/dereferencer-py/pyproject.toml similarity index 100% rename from docker/dereferencer/dereferencer-py/pyproject.toml rename to docker/lwua-dereferencer/dereferencer-py/pyproject.toml From 9188dccf78a99fbc184b1518085eb258f56ce2ef Mon Sep 17 00:00:00 2001 From: cedricd Date: Sat, 2 Dec 2023 00:20:25 +0100 Subject: [PATCH 53/60] fixed linting workflow --- .github/workflows/linting-python-files.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/linting-python-files.yml b/.github/workflows/linting-python-files.yml index 1e2bbbf..4743418 100644 --- a/.github/workflows/linting-python-files.yml +++ b/.github/workflows/linting-python-files.yml @@ -8,7 +8,7 @@ on: pull_request: paths: - 'docker/lwua-ingest/**/*.py' - - 'docker/dereferencer/**/*.py' + - 'docker/lwua-dereferencer/**/*.py' jobs: lint: @@ -30,12 +30,12 @@ jobs: - name: Run Black run: | black docker/lwua-ingest/ - black docker/dereferencer/ + black docker/lwua-dereferencer/ - name: Run autopep8 run: | autopep8 --in-place --aggressive --aggressive --max-line-length 79 --recursive docker/lwua-ingest/ - autopep8 --in-place --aggressive --aggressive --max-line-length 79 --recursive docker/dereferencer/ + autopep8 --in-place --aggressive --aggressive --max-line-length 79 --recursive docker/lwua-dereferencer/ - name: Commit and push changes run: | From 0f5714a51c2226e6d4edecc4d73967837e199f38 Mon Sep 17 00:00:00 2001 From: cedricdcc <30471340+cedricdcc@users.noreply.github.com> Date: Sat, 2 Dec 2023 00:23:54 +0100 Subject: [PATCH 54/60] Update derefEntity.py --- .../dereferencer-py/dereferencer/derefEntity.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py index a457c72..08f6f7f 100644 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ b/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -34,6 +34,7 @@ def url_2_fname(url: str): DATA_FOLDER = data_folder_from_config() + def run_download_propertypaths(propertypaths, uri, file_name, download_uri): """runs the download_propertypaths function for all propertypaths""" for propertypath in propertypaths: From dec05f68b6abaa154cb9f3543798225f11a664b4 Mon Sep 17 00:00:00 2001 From: cedricd Date: Sat, 2 Dec 2023 00:25:20 +0100 Subject: [PATCH 55/60] wf-update --- .github/workflows/linting-python-files.yml | 2 +- .../dereferencer-py/dereferencer/dereference.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/linting-python-files.yml b/.github/workflows/linting-python-files.yml index 4743418..9f3cffb 100644 --- a/.github/workflows/linting-python-files.yml +++ b/.github/workflows/linting-python-files.yml @@ -4,7 +4,7 @@ on: push: paths: - 'docker/lwua-ingest/**/*.py' - - 'docker/dereferencer/**/*.py' + - 'docker/lwua-dereferencer/**/*.py' pull_request: paths: - 'docker/lwua-ingest/**/*.py' diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py index c7afeac..93bfb75 100644 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py +++ b/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py @@ -12,7 +12,6 @@ log = logging.getLogger(__name__) - def config_path_from_config(): local_default = str(resolve_path("../configs", versus="dotenv")) folder_name = os.getenv("CONFIG_FILES_FOLDER", local_default) From cc35c2bb177ae33ba9944ef41d52b47719e4a096 Mon Sep 17 00:00:00 2001 From: cedricdcc Date: Fri, 1 Dec 2023 23:25:46 +0000 Subject: [PATCH 56/60] Automated python code formatting --- .../dereferencer/derefEntity.py | 36 +++++++++++++------ .../dereferencer/dereference.py | 1 + .../dereferencer-py/dereferencer/graph.py | 24 ++++++------- 3 files changed, 38 insertions(+), 23 deletions(-) diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py index 08f6f7f..0521c17 100644 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ b/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py @@ -28,13 +28,19 @@ def url_2_fname(url: str): :return: The filename corresponding to the URL. :rtype: str """ - return url.replace(":", "_").replace("/", "_").replace("?", "_").replace("&", "_").replace("=", "_").replace("#", "_") + return ( + url.replace(":", "_") + .replace("/", "_") + .replace("?", "_") + .replace("&", "_") + .replace("=", "_") + .replace("#", "_") + ) DATA_FOLDER = data_folder_from_config() - def run_download_propertypaths(propertypaths, uri, file_name, download_uri): """runs the download_propertypaths function for all propertypaths""" for propertypath in propertypaths: @@ -42,7 +48,9 @@ def run_download_propertypaths(propertypaths, uri, file_name, download_uri): # if propertypath is a string, download it if isinstance(propertypath, str): log.info(f"propertypath is a string: {propertypath}, downloading") - to_download = uri_list_from_deref_property_path(uri, file_name, propertypath) + to_download = uri_list_from_deref_property_path( + uri, file_name, propertypath + ) for uri in to_download: if get_uri_downloaded(url_2_fname(uri)) is None: download_uri(uri, url_2_fname(uri)) @@ -51,16 +59,20 @@ def run_download_propertypaths(propertypaths, uri, file_name, download_uri): # property path is the key of the dict property_to_search = list(propertypath.keys())[0] log.info(f"property_to_search: {property_to_search}") - - to_download = uri_list_from_deref_property_path(uri, file_name, property_to_search) + + to_download = uri_list_from_deref_property_path( + uri, file_name, property_to_search + ) for uri in to_download: file_name = get_uri_downloaded(url_2_fname(uri)) if file_name is None: file_name = download_uri(uri, url_2_fname(uri)) uri = uri propertypath = propertypath[property_to_search] - run_download_propertypaths(propertypath, uri, file_name, download_uri) - + run_download_propertypaths( + propertypath, uri, file_name, download_uri) + + def download_uri(uri: str, file_name: str): """downloads the uri either in json-ld or ttl format and puts the result in the DATA_FOLDER @@ -96,6 +108,7 @@ def download_uri(uri: str, file_name: str): ) return None + def get_uri_downloaded(file_name: str): """gets the filename of the uri if it is already downloaded @@ -104,7 +117,7 @@ def get_uri_downloaded(file_name: str): :return: filename of the uri if it is already downloaded :rtype: str """ - + if os.path.isfile(DATA_FOLDER + "/" + file_name + ".json"): filename = DATA_FOLDER + "/" + file_name + ".json" return filename.replace(DATA_FOLDER, "/data") @@ -124,7 +137,8 @@ def __init__(self, uri: str, propertypaths: dict): if self.filename is None: log.info(f"uri {uri} not downloaded yet") self.filename = download_uri(uri, self.file_name) - + if propertypaths is not None: - run_download_propertypaths(self.propertypaths, self.uri, self.filename, download_uri) - + run_download_propertypaths( + self.propertypaths, self.uri, self.filename, download_uri + ) diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py index 93bfb75..c7afeac 100644 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py +++ b/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py @@ -12,6 +12,7 @@ log = logging.getLogger(__name__) + def config_path_from_config(): local_default = str(resolve_path("../configs", versus="dotenv")) folder_name = os.getenv("CONFIG_FILES_FOLDER", local_default) diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py index 094c757..b7df1c6 100644 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py +++ b/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py @@ -115,7 +115,10 @@ def uri_list(query): log.debug(f"uri_list: {query}") # Extract the variable from the SELECT clause - select_part = re.search('SELECT(.*?)(FROM|WHERE)', query, re.IGNORECASE).group(1) + select_part = re.search( + "SELECT(.*?)(FROM|WHERE)", + query, + re.IGNORECASE).group(1) variables = select_part.split() # Check that there is exactly one variable in the SELECT part of the @@ -135,28 +138,25 @@ def uri_list(query): # Use the extracted variable when getting the results return [result[var]["value"] for result in results["results"]["bindings"]] -def uri_list_from_deref_property_path(url,filename,propertypath): + +def uri_list_from_deref_property_path(url, filename, propertypath): """ Return a list of URI's from a query """ log.debug(f"uri_list: {propertypath}") template = "deref_property_path.sparql" - + graph = fname_2_context(filename) - - vars = { - "graph": graph, - "subject": url, - "property": propertypath - } - + + vars = {"graph": graph, "subject": url, "property": propertypath} + query = J2RDF.build_syntax(template, **vars) log.debug(f"uri_list_from_deref_property_path query == {query}") GDB.setQuery(query) GDB.setReturnFormat(JSON) results = GDB.query().convert() log.debug(f"uri_list_from_deref_property_path results: {results}") - + # Use the extracted variable when getting the results - return [result["o"]["value"] for result in results["results"]["bindings"]] \ No newline at end of file + return [result["o"]["value"] for result in results["results"]["bindings"]] From bbf25ef2742e1c014ba754d813a00fe294313da4 Mon Sep 17 00:00:00 2001 From: cedricd Date: Mon, 4 Dec 2023 15:08:34 +0100 Subject: [PATCH 57/60] Revert "Automated code formatting" This reverts commit c6246c8253d8b19b3cca138902be9ca462135a21. --- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 6 ++---- docker/lwua-ingest/lwua-py/lwua/ingest.py | 4 +--- docker/lwua-ingest/lwua-py/lwua/schedule.py | 7 ++++--- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index f8d4583..ba552b9 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -187,11 +187,9 @@ def context_2_fname(context: str): :return: The filename corresponding to the context. :rtype: str """ - assert context.startswith( - URN_BASE), f"Context {context} is not IRI compliant" + assert context.startswith(URN_BASE), f"Context {context} is not IRI compliant" return unquote(context[len(URN_BASE) + 1:]) - def fname_2_context(fname: str): """ Convert a filename to a context. @@ -233,7 +231,7 @@ def convert_results_registry_of_lastmod(results): return converted -def format_from_filepath(fpath: Path): +def format_from_filepath(fpath:Path): suffix = fpath.suffix[1:].lower() if suffix in ["ttl", "turtle"]: return "turtle" diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index f1c3fb4..ed44444 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -13,7 +13,7 @@ ingest_graph, update_registry_lastmod, read_graph, - fname_2_context, + fname_2_context ) @@ -22,7 +22,6 @@ # functions here to ingest and delete files - def delete_data_file(fname): context = fname_2_context(fname) log.info(f"deleting {fname} from {context}") @@ -53,7 +52,6 @@ def data_path_from_config(): folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) return Path(folder_name).absolute() - class Ingester: def __init__(self): data_path = data_path_from_config() diff --git a/docker/lwua-ingest/lwua-py/lwua/schedule.py b/docker/lwua-ingest/lwua-py/lwua/schedule.py index eb67cba..c2c2378 100644 --- a/docker/lwua-ingest/lwua-py/lwua/schedule.py +++ b/docker/lwua-ingest/lwua-py/lwua/schedule.py @@ -19,20 +19,21 @@ def __init__(self, run_on_start: bool = True): # get the waittime before starting the scheduler waittime = os.getenv("SCHEDULER_WAIT", "10") time.sleep(int(waittime)) - + super().__init__() self._run_on_start = run_on_start self.add_job(lambda: self.main_schedule(), "interval", **timeprops) def start(self): try: - self.ingester = Ingester() + self.ingester = Ingester() if self._run_on_start: self.main_schedule() super().start() except (KeyboardInterrupt, SystemExit): log.info("execution interrupted") - + def main_schedule(self): log.info("starting main service flow") self.ingester.run_ingest() + From 7cac5e228aa5a35f79856c8c2350da6e69c302aa Mon Sep 17 00:00:00 2001 From: cedricdcc Date: Mon, 4 Dec 2023 14:09:07 +0000 Subject: [PATCH 58/60] Automated python code formatting --- docker/lwua-ingest/lwua-py/lwua/graphdb.py | 6 ++++-- docker/lwua-ingest/lwua-py/lwua/ingest.py | 4 +++- docker/lwua-ingest/lwua-py/lwua/schedule.py | 7 +++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docker/lwua-ingest/lwua-py/lwua/graphdb.py b/docker/lwua-ingest/lwua-py/lwua/graphdb.py index ba552b9..f8d4583 100644 --- a/docker/lwua-ingest/lwua-py/lwua/graphdb.py +++ b/docker/lwua-ingest/lwua-py/lwua/graphdb.py @@ -187,9 +187,11 @@ def context_2_fname(context: str): :return: The filename corresponding to the context. :rtype: str """ - assert context.startswith(URN_BASE), f"Context {context} is not IRI compliant" + assert context.startswith( + URN_BASE), f"Context {context} is not IRI compliant" return unquote(context[len(URN_BASE) + 1:]) + def fname_2_context(fname: str): """ Convert a filename to a context. @@ -231,7 +233,7 @@ def convert_results_registry_of_lastmod(results): return converted -def format_from_filepath(fpath:Path): +def format_from_filepath(fpath: Path): suffix = fpath.suffix[1:].lower() if suffix in ["ttl", "turtle"]: return "turtle" diff --git a/docker/lwua-ingest/lwua-py/lwua/ingest.py b/docker/lwua-ingest/lwua-py/lwua/ingest.py index ed44444..f1c3fb4 100644 --- a/docker/lwua-ingest/lwua-py/lwua/ingest.py +++ b/docker/lwua-ingest/lwua-py/lwua/ingest.py @@ -13,7 +13,7 @@ ingest_graph, update_registry_lastmod, read_graph, - fname_2_context + fname_2_context, ) @@ -22,6 +22,7 @@ # functions here to ingest and delete files + def delete_data_file(fname): context = fname_2_context(fname) log.info(f"deleting {fname} from {context}") @@ -52,6 +53,7 @@ def data_path_from_config(): folder_name = os.getenv("INGEST_DATA_FOLDER", local_default) return Path(folder_name).absolute() + class Ingester: def __init__(self): data_path = data_path_from_config() diff --git a/docker/lwua-ingest/lwua-py/lwua/schedule.py b/docker/lwua-ingest/lwua-py/lwua/schedule.py index c2c2378..eb67cba 100644 --- a/docker/lwua-ingest/lwua-py/lwua/schedule.py +++ b/docker/lwua-ingest/lwua-py/lwua/schedule.py @@ -19,21 +19,20 @@ def __init__(self, run_on_start: bool = True): # get the waittime before starting the scheduler waittime = os.getenv("SCHEDULER_WAIT", "10") time.sleep(int(waittime)) - + super().__init__() self._run_on_start = run_on_start self.add_job(lambda: self.main_schedule(), "interval", **timeprops) def start(self): try: - self.ingester = Ingester() + self.ingester = Ingester() if self._run_on_start: self.main_schedule() super().start() except (KeyboardInterrupt, SystemExit): log.info("execution interrupted") - + def main_schedule(self): log.info("starting main service flow") self.ingester.run_ingest() - From 19f5a97064387ef4ec1f26e0ce43850d3bb1e235 Mon Sep 17 00:00:00 2001 From: cedricd Date: Mon, 4 Dec 2023 15:10:32 +0100 Subject: [PATCH 59/60] no deref --- configs/dereference_mr_test.yml | 15 - configs/dereference_test.yml | 8 - configs/wrongSPARQL.yml | 8 - data/mr_regions_ldes_test.ttl | 89 --- docker/lwua-dereferencer/Dockerfile | 38 -- .../dereferencer-py/debug-logconf.yml | 35 - .../dereferencer-py/dereferencer/__init__.py | 4 - .../dereferencer-py/dereferencer/__main__.py | 45 -- .../dereferencer-py/dereferencer/daemon.py | 152 ----- .../dereferencer/derefEntity.py | 144 ----- .../dereferencer/dereference.py | 57 -- .../dereferencer-py/dereferencer/graph.py | 162 ----- .../dereferencer-py/dereferencer/helpers.py | 77 --- .../dereferencer-py/dereferencer/schedule.py | 41 -- .../templates/deref_property_path.sparql | 14 - .../templates/lastmod_info.sparql | 9 - .../lwua-dereferencer/dereferencer-py/logging | 1 - .../dereferencer-py/poetry.lock | 596 ------------------ .../dereferencer-py/pyproject.toml | 24 - 19 files changed, 1519 deletions(-) delete mode 100644 configs/dereference_mr_test.yml delete mode 100644 configs/dereference_test.yml delete mode 100644 configs/wrongSPARQL.yml delete mode 100644 data/mr_regions_ldes_test.ttl delete mode 100644 docker/lwua-dereferencer/Dockerfile delete mode 100644 docker/lwua-dereferencer/dereferencer-py/debug-logconf.yml delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/__init__.py delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/__main__.py delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/daemon.py delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/helpers.py delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/schedule.py delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/deref_property_path.sparql delete mode 100644 docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql delete mode 100644 docker/lwua-dereferencer/dereferencer-py/logging delete mode 100644 docker/lwua-dereferencer/dereferencer-py/poetry.lock delete mode 100644 docker/lwua-dereferencer/dereferencer-py/pyproject.toml diff --git a/configs/dereference_mr_test.yml b/configs/dereference_mr_test.yml deleted file mode 100644 index 6ca13b0..0000000 --- a/configs/dereference_mr_test.yml +++ /dev/null @@ -1,15 +0,0 @@ -SPARQL: > - PREFIX dc: - SELECT ?o - FROM - WHERE { - ?s dc:isVersionOf ?o . - FILTER regex(str(?o), "marineregions.org") - } -property_paths: - - http://marineregions.org/ns/ontology#hasGeometry - - http://marineregions.org/ns/ontology#isPartOf: - - http://marineregions.org/ns/ontology#hasGeometry - - https://schema.org/geo: - - https://schema.org/latitude - - https://schema.org/longitude diff --git a/configs/dereference_test.yml b/configs/dereference_test.yml deleted file mode 100644 index 2c86d30..0000000 --- a/configs/dereference_test.yml +++ /dev/null @@ -1,8 +0,0 @@ -SPARQL: > - PREFIX rdf: - PREFIX schema: - SELECT ?s - WHERE { - ?s rdf:type schema:Person . - } -property_paths: \ No newline at end of file diff --git a/configs/wrongSPARQL.yml b/configs/wrongSPARQL.yml deleted file mode 100644 index 903d9d1..0000000 --- a/configs/wrongSPARQL.yml +++ /dev/null @@ -1,8 +0,0 @@ -SPARQL: > - PREFIX rdf: - PREFIX schema: - SELECT ?s ?o - WHERE { - ?s rdf:type ?o . - } -property_paths: \ No newline at end of file diff --git a/data/mr_regions_ldes_test.ttl b/data/mr_regions_ldes_test.ttl deleted file mode 100644 index 4a9da1d..0000000 --- a/data/mr_regions_ldes_test.ttl +++ /dev/null @@ -1,89 +0,0 @@ -@prefix tree: . -@prefix ldes: . -@prefix dc: . -@prefix sh: . -@prefix xsd: . -@prefix skos: . -@prefix gsp: . -@prefix dcat: . -@prefix mr: . -@prefix schema: . -@prefix rdf: . - - - a tree:Node ; - tree:relation [ tree:node ] ; - ldes:retentionPolicy [ - a ldes:LatestVersionSubset ; - ldes:amount 1 ; - ldes:versionKey ( dc:isVersionOf ) - ] . - - - a ldes:EventStream ; - tree:shape [ - a sh:NodeShape ; - sh:nodeKind sh:IRI ; - sh:property [ - sh:datatype xsd:dateTime ; - sh:minCount 1 ; - sh:path dc:modified - ], [ - sh:minCount 1 ; - sh:nodeKind sh:IRI ; - sh:path dc:isVersionOf - ], [ sh:path skos:note ], [ sh:path skos:historyNote ], [ - sh:datatype gsp:wktLiteral ; - sh:maxCount 1 ; - sh:minCount 1 ; - sh:path dcat:centroid - ], [ - sh:datatype gsp:wktLiteral ; - sh:maxCount 1 ; - sh:minCount 0 ; - sh:path dcat:bbox - ], [ - sh:minCount 0 ; - sh:nodekind sh:IRI ; - sh:path mr:hasGeometry - ], [ - sh:minCount 0 ; - sh:node [ - a sh:NodeShape ; - sh:nodeKind sh:IRI ; - sh:property [ - sh:class schema:PropertyValue ; - sh:maxCount 1 ; - sh:minCount 1 ; - sh:path schema:identifier - ], [ - sh:maxCount 1 ; - sh:minCount 1 ; - sh:nodeKind sh:IRI ; - sh:path schema:url - ] - ] ; - sh:path skos:exactMatch - ], [ - sh:datatype rdf:langString ; - sh:minCount 1 ; - sh:path skos:prefLabel - ], [ - sh:datatype rdf:langString ; - sh:minCount 0 ; - sh:path skos:altLabel - ], [ - sh:class mr:MRGeoObject ; - sh:minCount 0 ; - sh:nodeKind sh:IRI ; - sh:path mr:isRelatedTo - ] ; - sh:targetClass mr:MRGeoObject - ] ; - tree:view ; - tree:member . - - - dc:isVersionOf ; - dc:modified "2023-11-28T10:02:22Z"^^xsd:dateTime . - diff --git a/docker/lwua-dereferencer/Dockerfile b/docker/lwua-dereferencer/Dockerfile deleted file mode 100644 index 9d95cdf..0000000 --- a/docker/lwua-dereferencer/Dockerfile +++ /dev/null @@ -1,38 +0,0 @@ -FROM python:3.11-slim as python -VOLUME /data -VOLUME /logging -WORKDIR /dereferencer-py -ENV PYTHONUNBUFFERED=true - -# check for inspiration on including poetry -# - https://hub.docker.com/r/airdock/python-poetry -# - https://binx.io/nl/2022/06/13/poetry-docker/ - - -# create an image version point where poetry and its dependencies are available -# and use that to build the python package locally -FROM python as poetry -# gcc needed in the build of many python dependencies -# removed from python-slim for size trimming - but have to re-add here -RUN apt-get update -y && apt-get upgrade -y && apt-get install -y gcc -ENV POETRY_HOME=/opt/poetry -ENV POETRY_VIRTUALENVS_IN_PROJECT=true -ENV PATH="$POETRY_HOME/bin:$PATH" -RUN python -c 'from urllib.request import urlopen; print(urlopen("https://install.python-poetry.org").read().decode())' | python - - -# get the source code in -COPY ./dereferencer-py /dereferencer-py -# use poetry to build and install -- creating the local .venv -RUN poetry install --no-interaction --no-ansi -vvv - - -# now go back to the original slim image to build the runtime image -# and just grab the build env from the intermediate stage -FROM python as runtime -# ensure the .venv python is used -ENV PATH="/dereferencer-py/.venv/bin:$PATH" -# ensure we have the build folder from the poetry stage of this image -COPY --from=poetry /dereferencer-py /dereferencer-py - -RUN rm -f /dereferencer-py/logging && ln -s /logging /dereferencer-py/logging -ENTRYPOINT ["python", "-m", "dereferencer"] \ No newline at end of file diff --git a/docker/lwua-dereferencer/dereferencer-py/debug-logconf.yml b/docker/lwua-dereferencer/dereferencer-py/debug-logconf.yml deleted file mode 100644 index 295ac9b..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/debug-logconf.yml +++ /dev/null @@ -1,35 +0,0 @@ -# new yaml format for python logging config -# see https://docs.python.org/3/library/logging.config.html for description of dictConfig -version: 1 -formatters: - base: - format: '%(asctime)-18s @%(name)-23s [%(levelname)-8s] %(message)s' - datefmt: '%Y-%m-%d %H:%M:%S' -handlers: - stderr: - class: logging.StreamHandler - level: DEBUG - formatter: base - stream: ext://sys.stderr - file: - class: logging.FileHandler - level: DEBUG - formatter: base - filename: logging/dereferencer-debug.log - mode: 'a' # in stead of 'w' -loggers: - __main__: - level: DEBUG - propagate: yes - tests: - level: DEBUG - propagate: yes - dereferencer: - level: DEBUG - propagate: yes - apscheduler: - level: WARN - propagate: yes -root: - level: DEBUG - handlers: [stderr, file] diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/__init__.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/__init__.py deleted file mode 100644 index f9d322f..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -__all__ = ["DereferenceScheduler"] - -# Add scheduler path here -from .schedule import DereferenceScheduler diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/__main__.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/__main__.py deleted file mode 100644 index ac12b2e..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/__main__.py +++ /dev/null @@ -1,45 +0,0 @@ -""" LWUAIngest service entry-point --- support service-like commands start/stop/status/reload --- will start the cron-service-dispatch and the web UI endpoint -""" -from .daemon import Daemon -from .helpers import enable_logging, resolve_path -from dotenv import load_dotenv -from dereferencer import DereferenceScheduler -import sys -import logging - -log = logging.getLogger(__name__) - - -class IngestDaemon(Daemon): - def run(self): - try: - # setup - log.info("setting up") - scheduler: DereferenceScheduler = DereferenceScheduler() - - # action - log.info("starting schedule") - scheduler.start() - - except Exception as e: - log.exception(e) - finally: - # teardown - log.info("teardown") - - -def main(): - load_dotenv() - enable_logging() - - pidfilename: str = "dereferencer-daemon.pid" - # double dirname ends at parent! - pidfile: str = resolve_path(pidfilename) - - IngestDaemon(pidfile)._cmd(sys.argv) - - -if __name__ == "__main__": - main() diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/daemon.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/daemon.py deleted file mode 100644 index 24055b8..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/daemon.py +++ /dev/null @@ -1,152 +0,0 @@ -""" Deamon wrapper script ---> https://stackoverflow.com/questions/1603109/how-to-make-a-python-script-run-like-a-service-or-daemon-in-linux ---> https://web.archive.org/web/20160320091458/http://www.jejik.com/files/examples/daemon3x.py -""" -import sys -import os -import time -import atexit -import signal -from abc import ABC, abstractmethod -import logging - - -log = logging.getLogger(__name__) - - -class Daemon(ABC): - """A generic daemon class. - Usage: subclass the daemon class and override the run() method. - """ - - def __init__(self, pidfile): - self.pidfile = str(pidfile) - - def daemonize(self): - """Deamonize class. UNIX double fork mechanism.""" - - try: - pid = os.fork() - if pid > 0: - # exit first parent - sys.exit(0) - except OSError as err: - log.exception(f"fork #1 failed: {err}") - sys.exit(1) - - # decouple from parent environment - os.chdir("/") - os.setsid() - os.umask(0) - - # do second fork - try: - pid = os.fork() - if pid > 0: - # exit from second parent - sys.exit(0) - except OSError as err: - log.exception(f"fork #2 failed: {err}") - sys.exit(1) - - # redirect standard file descriptors - sys.stdout.flush() - sys.stderr.flush() - si = open(os.devnull, "r") - so = open(os.devnull, "a+") - se = open(os.devnull, "a+") - - os.dup2(si.fileno(), sys.stdin.fileno()) - os.dup2(so.fileno(), sys.stdout.fileno()) - os.dup2(se.fileno(), sys.stderr.fileno()) - - # write pidfile - atexit.register(self.delpid) - - pid = str(os.getpid()) - log.info(f"creating pidfile {self.pidfile} - containing {pid}") - with open(self.pidfile, "w+") as f: - f.write(pid + "\n") - - def delpid(self): - log.info("atexit -- service run completed -- removing pidfile") - os.remove(self.pidfile) - - def start(self): - """Start the daemon.""" - - # Check for a pidfile to see if the daemon already runs - try: - with open(self.pidfile, "r") as pf: - pid = int(pf.read().strip()) - except IOError: - pid = None - - if pid: - message = f"pidfile {self.pidfile} already exist. Daemon already running?\n" - sys.stderr.write(message) - sys.exit(1) - - # Start the daemon - self.daemonize() - self.run() - - def stop(self): - """Stop the daemon.""" - - # Get the pid from the pidfile - try: - with open(self.pidfile, "r") as pf: - pid = int(pf.read().strip()) - except IOError: - pid = None - - if not pid: - message = f"pidfile {self.pidfile} does not exist. Daemon not running?\n" - sys.stderr.write(message) - return # not an error in a restart - - # Try killing the daemon process - - try: - while True: - os.kill(pid, signal.SIGTERM) - time.sleep(0.1) - except OSError as err: - e = str(err.args) - if e.find("No such process") > 0: - if os.path.exists(self.pidfile): - os.remove(self.pidfile) - else: - print(str(err.args)) - sys.exit(1) - - def restart(self): - """Restart the daemon.""" - self.stop() - self.start() - - @abstractmethod - def run(self): - """You should override this method when you subclass Daemon. - - It will be called after the process has been daemonized by - start() or restart(). - """ - - CMDS = ["start", "stop", "restart", "run"] - - def _usage(self): - print( - f"run this daemon script with one argument == {'|'.join(Daemon.CMDS)}") - - def _cmd(self, argv): - if len(argv) != 2: - log.warning(f"daemon started with cmdline ==> {argv}") - return self._usage() - # else - cmd = argv[1] - if cmd not in Daemon.CMDS: - return self._usage() - # else - self.__getattribute__(cmd)() diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py deleted file mode 100644 index 0521c17..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/derefEntity.py +++ /dev/null @@ -1,144 +0,0 @@ -# This file will contain all the classes and functions needed to complete -# the dereferencing process - -import logging -from pathlib import Path -import json -import requests -import os - -from .helpers import resolve_path -from .graph import uri_list_from_deref_property_path - -log = logging.getLogger(__name__) - - -def data_folder_from_config(): - local_default = str(resolve_path("../data", versus="dotenv")) - folder_name = os.getenv("DATA_FOLDER", local_default) - return folder_name - - -def url_2_fname(url: str): - """ - Convert a URL to a filename. - - :param url: The URL to convert. - :type url: str - :return: The filename corresponding to the URL. - :rtype: str - """ - return ( - url.replace(":", "_") - .replace("/", "_") - .replace("?", "_") - .replace("&", "_") - .replace("=", "_") - .replace("#", "_") - ) - - -DATA_FOLDER = data_folder_from_config() - - -def run_download_propertypaths(propertypaths, uri, file_name, download_uri): - """runs the download_propertypaths function for all propertypaths""" - for propertypath in propertypaths: - log.info(f"running download_propertypath for {propertypath}") - # if propertypath is a string, download it - if isinstance(propertypath, str): - log.info(f"propertypath is a string: {propertypath}, downloading") - to_download = uri_list_from_deref_property_path( - uri, file_name, propertypath - ) - for uri in to_download: - if get_uri_downloaded(url_2_fname(uri)) is None: - download_uri(uri, url_2_fname(uri)) - if isinstance(propertypath, dict): - log.info(f"propertypath is a dict: {propertypath}, downloading") - # property path is the key of the dict - property_to_search = list(propertypath.keys())[0] - log.info(f"property_to_search: {property_to_search}") - - to_download = uri_list_from_deref_property_path( - uri, file_name, property_to_search - ) - for uri in to_download: - file_name = get_uri_downloaded(url_2_fname(uri)) - if file_name is None: - file_name = download_uri(uri, url_2_fname(uri)) - uri = uri - propertypath = propertypath[property_to_search] - run_download_propertypaths( - propertypath, uri, file_name, download_uri) - - -def download_uri(uri: str, file_name: str): - """downloads the uri either in json-ld or ttl format and puts the result in the DATA_FOLDER - - :param uri: uri to download - :type uri: str - """ - log.info(f"downloading uri {uri}") - # perform request with accept header for json-ld or ttl - headers = {"Accept": "application/ld+json, text/turtle"} - r = requests.get(uri, headers=headers) - - # check if the request was successful and it returned a json-ld or ttl - # file - if r.status_code == 200 and ( - "application/ld+json" in r.headers["Content-Type"] - or "text/turtle" in r.headers["Content-Type"] - ): - # write the file to disk - # TODO: check if the file already exists - # check if the file is json-ld or ttl and add the correct extension - if "application/ld+json" in r.headers["Content-Type"]: - filename = DATA_FOLDER + "/" + file_name + ".json" - elif "text/turtle" in r.headers["Content-Type"]: - filename = DATA_FOLDER + "/" + file_name + ".ttl" - with open(filename, "w") as f: - f.write(r.text) - log.info(f"file saved to {filename}") - filename = filename.replace(DATA_FOLDER, "/data") - return filename - else: - log.warning( - f"request for {uri} failed with status code {r.status_code} and content type {r.headers['Content-Type']}" - ) - return None - - -def get_uri_downloaded(file_name: str): - """gets the filename of the uri if it is already downloaded - - :param file_name: filename of the uri - :type file_name: str - :return: filename of the uri if it is already downloaded - :rtype: str - """ - - if os.path.isfile(DATA_FOLDER + "/" + file_name + ".json"): - filename = DATA_FOLDER + "/" + file_name + ".json" - return filename.replace(DATA_FOLDER, "/data") - if os.path.isfile(DATA_FOLDER + "/" + file_name + ".ttl"): - filename = DATA_FOLDER + "/" + file_name + ".ttl" - return filename.replace(DATA_FOLDER, "/data") - return None - - -class DerefUriEntity: - def __init__(self, uri: str, propertypaths: dict): - self.uri = uri - self.propertypathmetadata = None - self.propertypaths = propertypaths - self.file_name = url_2_fname(uri) - self.filename = get_uri_downloaded(self.file_name) - if self.filename is None: - log.info(f"uri {uri} not downloaded yet") - self.filename = download_uri(uri, self.file_name) - - if propertypaths is not None: - run_download_propertypaths( - self.propertypaths, self.uri, self.filename, download_uri - ) diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py deleted file mode 100644 index c7afeac..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/dereference.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging -import time -from dotenv import load_dotenv -from datetime import datetime -from SPARQLWrapper import SPARQLWrapper, JSON -import os -import yaml -from pathlib import Path -from .helpers import enable_logging, resolve_path -from .graph import uri_list -from .derefEntity import DerefUriEntity - -log = logging.getLogger(__name__) - - -def config_path_from_config(): - local_default = str(resolve_path("../configs", versus="dotenv")) - folder_name = os.getenv("CONFIG_FILES_FOLDER", local_default) - return Path(folder_name).absolute() - - -class Dereference: - def __init__(self): - pass - - def run_dereference(self): - log.info("running dereference") - - config_folder_path = config_path_from_config() - log.info(f"run_dereference on config files in {config_folder_path}") - # get all the config files in the config folder - # the files should be in yml or yaml format and should start with - # dereference - config_files = [f for f in config_folder_path.glob("dereference*.yml")] - log.info(f"config files found: {config_files}") - - # for each config file , parse the file and get the config - for config_file in config_files: - log.info(f"config file: {config_file}") - with open(config_file, "r") as stream: - try: - config = yaml.safe_load(stream) - log.info(f"config: {config}") - - sparql_query = config["SPARQL"] - uri_list_from_query = uri_list(sparql_query) - - # make a derefEntity for each uri in the - # uri_list_from_query - for uri in uri_list_from_query: - log.info(f"uri: {uri}") - derefEntity = DerefUriEntity( - uri, config["property_paths"]) - log.info(f"derefEntity: {derefEntity}") - - except yaml.YAMLError as exc: - log.error(exc) diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py deleted file mode 100644 index b7df1c6..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/graph.py +++ /dev/null @@ -1,162 +0,0 @@ -# this file will contain the functions that will be used to query the -# graph database - -import logging -from SPARQLWrapper import SPARQLWrapper, JSON -from urllib.parse import unquote, quote -from pyrdfj2 import J2RDFSyntaxBuilder -import logging -import os -import re -from .helpers import resolve_path - -# from dotenv import load_dotenv - -log = logging.getLogger(__name__) -URN_BASE = os.getenv("URN_BASE", "urn:lwua:INGEST") - - -def gdb_from_config(): - base = os.getenv("GDB_BASE", "http://localhost:7200") - repoid = os.getenv("GDB_REPO", "lwua23") - - endpoint = f"{ base }/repositories/{ repoid }" - # update statements are handled at other endpoint - updateEndpoint = endpoint + "/statements" - - log.debug(f"using endpoint {endpoint}") - - GDB = SPARQLWrapper( - endpoint=endpoint, - updateEndpoint=updateEndpoint, - returnFormat=JSON, - agent="lwua-python-sparql-client", - ) - GDB.method = "POST" - return GDB - - -GDB = gdb_from_config() - - -def get_j2rdf_builder(): - template_folder = resolve_path("./dereferencer/templates") - log.info(f"template_folder == {template_folder}") - # init J2RDFSyntaxBuilder - j2rdf = J2RDFSyntaxBuilder( - templates_folder=template_folder, - extra_functions={"registry_of_lastmod_context": f"{URN_BASE}:ADMIN"}, - ) - return j2rdf - - -J2RDF = get_j2rdf_builder() - - -def get_registry_of_lastmod(): - log.info(f"getting last modified graph") - - template = "lastmod_info.sparql" - vars = {} - query = J2RDF.build_syntax(template, **vars) - # log.debug(f"get_admin_graph query == {query}") - GDB.setQuery(query) - GDB.setReturnFormat(JSON) - results = GDB.query().convert() - - # convert {'head': {'vars': ['graph', 'lastmod']}, 'results': {'bindings': []}} to [{PosixPath('graph'): lastmod}] - # URI must be substracted from graph context and datetime str must be - # converted to epoch - - converted = {} - return convert_results_registry_of_lastmod(results) - - -def convert_results_registry_of_lastmod(results): - converted = {} - for g in results["results"]["bindings"]: - path = context_2_fname(g["graph"]["value"]) - time = datetime.fromisoformat(g["lastmod"]["value"]) - converted[path] = time - return converted - - -def context_2_fname(context: str): - """ - Convert a context to a filename path. - - :param context: The context to convert. - :type context: str - :return: The filename corresponding to the context. - :rtype: str - """ - assert context.startswith( - URN_BASE), f"Context {context} is not IRI compliant" - return unquote(context[len(URN_BASE) + 1:]) - - -def fname_2_context(fname: str): - """ - Convert a filename to a context. - - :param fname: The filename to convert. - :type fname: str - :return: The context corresponding to the filename. - :rtype: str - """ - fname = str(fname) - return f"{URN_BASE}:{quote(fname)}" - - -def uri_list(query): - """ - Return a list of URI's from a query - """ - log.debug(f"uri_list: {query}") - - # Extract the variable from the SELECT clause - select_part = re.search( - "SELECT(.*?)(FROM|WHERE)", - query, - re.IGNORECASE).group(1) - variables = select_part.split() - - # Check that there is exactly one variable in the SELECT part of the - # SPARQL query - if len(variables) != 1: - error_message = f"There should be exactly one variable in the SELECT part of the SPARQL query but found {len(variables)} in {variables}" - log.error(error_message) - raise ValueError(error_message) - - var = variables[0][1:] # remove the ? from the variable - - GDB.setQuery(query) - GDB.setReturnFormat(JSON) - results = GDB.query().convert() - log.debug(f"uri_list: results: {results}") - - # Use the extracted variable when getting the results - return [result[var]["value"] for result in results["results"]["bindings"]] - - -def uri_list_from_deref_property_path(url, filename, propertypath): - """ - Return a list of URI's from a query - """ - log.debug(f"uri_list: {propertypath}") - - template = "deref_property_path.sparql" - - graph = fname_2_context(filename) - - vars = {"graph": graph, "subject": url, "property": propertypath} - - query = J2RDF.build_syntax(template, **vars) - log.debug(f"uri_list_from_deref_property_path query == {query}") - GDB.setQuery(query) - GDB.setReturnFormat(JSON) - results = GDB.query().convert() - log.debug(f"uri_list_from_deref_property_path results: {results}") - - # Use the extracted variable when getting the results - return [result["o"]["value"] for result in results["results"]["bindings"]] diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/helpers.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/helpers.py deleted file mode 100644 index 7c3c720..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/helpers.py +++ /dev/null @@ -1,77 +0,0 @@ -import yaml -import logging -import logging.config -import os -from pathlib import Path -from dotenv import find_dotenv - - -log = logging.getLogger(__name__) - - -def yaml_load_file(file): - if file is None: - log.debug("can not load unspecified yaml file") - return None - # else - try: - with open(file, "r") as yml_file: - return yaml.load(yml_file, Loader=yaml.SafeLoader) - except Exception as e: - log.exception(e) - return dict() - - -def find_logconf(logconf): - if logconf is None or logconf == "": - return None - for vs in ["dotenv", "module", "work"]: # try in this order - logconf_path = resolve_path(logconf, versus=vs) - print(f"trying vs {vs} --> {logconf_path} ?") - if logconf_path.exists(): - return logconf_path - # else - raise Exception( - f"config error logconf file {logconf} not found relative to dotenv, module or pwd" - ) - - -def enable_logging(logconf: str = None): - """Configures logging based on logconf specified through .env ${LOGCONF}""" - logconf = os.getenv("LOGCONF") if logconf is None else logconf - logconf_path = find_logconf(logconf) - if logconf_path is None: - log.info("No logging config found.") - return - # else - logconf = str(logconf_path) - logging.config.dictConfig(yaml_load_file(logconf)) - log.info(f"Logging enabled according to config in {logconf}") - - -def singleton(class_): - """Decorator for singleton classes""" - instances = {} - - def getinstance(*args, **kwargs): - if class_ not in instances: - instances[class_] = class_(*args, **kwargs) - return instances[class_] - - return getinstance - - -LOCATIONS: dict[str, Path] = dict( - work=Path().cwd(), - helpers=Path(__file__).parent.absolute(), - module=Path(__file__).parent.parent.absolute(), - dotenv=Path(find_dotenv()).parent, -) - - -def resolve_path(location: str, versus: str = "module"): - location = location if location else "" - assert versus in LOCATIONS, f"no base path available for coded versus = '{versus}'" - base: Path = LOCATIONS[versus] - log.debug(f"resolve path base='{base}' + rel='{location}'") - return Path(base, location).absolute() diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/schedule.py b/docker/lwua-dereferencer/dereferencer-py/dereferencer/schedule.py deleted file mode 100644 index 951a4ee..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/schedule.py +++ /dev/null @@ -1,41 +0,0 @@ -""" Scheduling the regular running functions of the main workflow (ingest / process / store) -""" -import logging -import time -import os -from apscheduler.schedulers.blocking import BlockingScheduler -from .dereference import Dereference - - -log = logging.getLogger(__name__) - - -# https://apscheduler.readthedocs.io/en/3.x/userguide.html -class DereferenceScheduler(BlockingScheduler): - def __init__(self, run_on_start: bool = True): - time_delta = os.getenv("SCHEDULER_PERIOD", "300") - timeprops: dict = dict(seconds=int(time_delta)) - - # get the waittime before starting the scheduler - waittime = os.getenv("SCHEDULER_WAIT", "10") - time.sleep(int(waittime)) - - super().__init__() - self._run_on_start = run_on_start - self.add_job(lambda: self.main_schedule(), "interval", **timeprops) - - def start(self): - try: - log.info("starting dereferencer scheduler") - self.dereferencer = Dereference() - if self._run_on_start: - self.main_schedule() - super().start() - except (KeyboardInterrupt, SystemExit): - log.info("execution interrupted") - except Exception as e: - log.exception(e) - - def main_schedule(self): - log.info("starting main service flow") - self.dereferencer.run_dereference() diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/deref_property_path.sparql b/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/deref_property_path.sparql deleted file mode 100644 index 98e345a..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/deref_property_path.sparql +++ /dev/null @@ -1,14 +0,0 @@ -{# - This Template is used to make a SPARQL query that will return the objects of a given property - This template takes in 3 parameters: - - The URI of the property - - The URI of the subject - - The URI of the graph -#} - -SELECT ?o -WHERE { - GRAPH <{{ graph }}> { - <{{ subject }}> <{{ property }}> ?o . - } -} \ No newline at end of file diff --git a/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql b/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql deleted file mode 100644 index 62464ed..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/dereferencer/templates/lastmod_info.sparql +++ /dev/null @@ -1,9 +0,0 @@ -{# - This template is used to generate SPARQL SELECT queries. - This template takes 1 parameter: - - registry_of_lastmod_context: the context from which the data is to be selected -#} - -SELECT ?graph ?lastmod WHERE { - GRAPH <{{ registry_of_lastmod_context }}> { ?graph ?lastmod } -} \ No newline at end of file diff --git a/docker/lwua-dereferencer/dereferencer-py/logging b/docker/lwua-dereferencer/dereferencer-py/logging deleted file mode 100644 index 42ff418..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/logging +++ /dev/null @@ -1 +0,0 @@ -../../../logging/ \ No newline at end of file diff --git a/docker/lwua-dereferencer/dereferencer-py/poetry.lock b/docker/lwua-dereferencer/dereferencer-py/poetry.lock deleted file mode 100644 index c6fde16..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/poetry.lock +++ /dev/null @@ -1,596 +0,0 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. - -[[package]] -name = "apscheduler" -version = "3.10.4" -description = "In-process task scheduler with Cron-like capabilities" -optional = false -python-versions = ">=3.6" -files = [ - {file = "APScheduler-3.10.4-py3-none-any.whl", hash = "sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661"}, - {file = "APScheduler-3.10.4.tar.gz", hash = "sha256:e6df071b27d9be898e486bc7940a7be50b4af2e9da7c08f0744a96d4bd4cef4a"}, -] - -[package.dependencies] -pytz = "*" -six = ">=1.4.0" -tzlocal = ">=2.0,<3.dev0 || >=4.dev0" - -[package.extras] -doc = ["sphinx", "sphinx-rtd-theme"] -gevent = ["gevent"] -mongodb = ["pymongo (>=3.0)"] -redis = ["redis (>=3.0)"] -rethinkdb = ["rethinkdb (>=2.4.0)"] -sqlalchemy = ["sqlalchemy (>=1.4)"] -testing = ["pytest", "pytest-asyncio", "pytest-cov", "pytest-tornado5"] -tornado = ["tornado (>=4.3)"] -twisted = ["twisted"] -zookeeper = ["kazoo"] - -[[package]] -name = "certifi" -version = "2023.11.17" -description = "Python package for providing Mozilla's CA Bundle." -optional = false -python-versions = ">=3.6" -files = [ - {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"}, - {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, -] - -[[package]] -name = "charset-normalizer" -version = "3.3.2" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -optional = false -python-versions = ">=3.7.0" -files = [ - {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, - {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, -] - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[[package]] -name = "idna" -version = "3.6" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.5" -files = [ - {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, - {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, -] - -[[package]] -name = "iniconfig" -version = "2.0.0" -description = "brain-dead simple config-ini parsing" -optional = false -python-versions = ">=3.7" -files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, -] - -[[package]] -name = "isodate" -version = "0.6.1" -description = "An ISO 8601 date/time/duration parser and formatter" -optional = false -python-versions = "*" -files = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, -] - -[package.dependencies] -six = "*" - -[[package]] -name = "jinja2" -version = "3.1.2" -description = "A very fast and expressive template engine." -optional = false -python-versions = ">=3.7" -files = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, -] - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "markupsafe" -version = "2.1.3" -description = "Safely add untrusted strings to HTML/XML markup." -optional = false -python-versions = ">=3.7" -files = [ - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, - {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, -] - -[[package]] -name = "packaging" -version = "23.2" -description = "Core utilities for Python packages" -optional = false -python-versions = ">=3.7" -files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, -] - -[[package]] -name = "pluggy" -version = "1.3.0" -description = "plugin and hook calling mechanisms for python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, - {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, -] - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["pytest", "pytest-benchmark"] - -[[package]] -name = "pyaml" -version = "23.9.7" -description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pyaml-23.9.7-py3-none-any.whl", hash = "sha256:fdb4c111b676d2381d1aa88c378fcde46c167575dfd688e656977a77075b692c"}, - {file = "pyaml-23.9.7.tar.gz", hash = "sha256:581ea4e99f0e308864407e04c03c609241aefa3a15dfba8964da7644baf3b217"}, -] - -[package.dependencies] -PyYAML = "*" - -[package.extras] -anchors = ["unidecode"] - -[[package]] -name = "pyparsing" -version = "3.1.1" -description = "pyparsing module - Classes and methods to define and execute parsing grammars" -optional = false -python-versions = ">=3.6.8" -files = [ - {file = "pyparsing-3.1.1-py3-none-any.whl", hash = "sha256:32c7c0b711493c72ff18a981d24f28aaf9c1fb7ed5e9667c9e84e3db623bdbfb"}, - {file = "pyparsing-3.1.1.tar.gz", hash = "sha256:ede28a1a32462f5a9705e07aea48001a08f7cf81a021585011deba701581a0db"}, -] - -[package.extras] -diagrams = ["jinja2", "railroad-diagrams"] - -[[package]] -name = "pyrdfj2" -version = "0.0.5" -description = "Python wrapper on jinja SPARQL templating" -optional = false -python-versions = ">=3.8.1,<4.0" -files = [ - {file = "pyrdfj2-0.0.5-py3-none-any.whl", hash = "sha256:fa8dabb66668345d6da64a58e9cead75b02090abd3727a9577140db10b00d6ce"}, - {file = "pyrdfj2-0.0.5.tar.gz", hash = "sha256:6d840015f7d493313c7fa432a5bef924da5cb9c5d76c39bd237caa36fcc1476a"}, -] - -[package.dependencies] -jinja2 = "*" -python-dateutil = "*" -uritemplate = "*" - -[[package]] -name = "pytest" -version = "7.4.3" -description = "pytest: simple powerful testing with Python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, - {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "sys_platform == \"win32\""} -iniconfig = "*" -packaging = "*" -pluggy = ">=0.12,<2.0" - -[package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] - -[[package]] -name = "python-dateutil" -version = "2.8.2" -description = "Extensions to the standard Python datetime module" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, -] - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "python-dotenv" -version = "1.0.0" -description = "Read key-value pairs from a .env file and set them as environment variables" -optional = false -python-versions = ">=3.8" -files = [ - {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, - {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, -] - -[package.extras] -cli = ["click (>=5.0)"] - -[[package]] -name = "pytz" -version = "2023.3.post1" -description = "World timezone definitions, modern and historical" -optional = false -python-versions = "*" -files = [ - {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, - {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, -] - -[[package]] -name = "pyyaml" -version = "6.0.1" -description = "YAML parser and emitter for Python" -optional = false -python-versions = ">=3.6" -files = [ - {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, - {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, - {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, - {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, - {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, - {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, - {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, - {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, - {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, - {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, - {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, - {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, - {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, - {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, - {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, - {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, - {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, - {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, - {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, - {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, - {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, - {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, - {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, - {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, -] - -[[package]] -name = "rdflib" -version = "7.0.0" -description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." -optional = false -python-versions = ">=3.8.1,<4.0.0" -files = [ - {file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"}, - {file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"}, -] - -[package.dependencies] -isodate = ">=0.6.0,<0.7.0" -pyparsing = ">=2.1.0,<4" - -[package.extras] -berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] -html = ["html5lib (>=1.0,<2.0)"] -lxml = ["lxml (>=4.3.0,<5.0.0)"] -networkx = ["networkx (>=2.0.0,<3.0.0)"] - -[[package]] -name = "requests" -version = "2.31.0" -description = "Python HTTP for Humans." -optional = false -python-versions = ">=3.7" -files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, -] - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - -[[package]] -name = "sparqlwrapper" -version = "2.0.0" -description = "SPARQL Endpoint interface to Python" -optional = false -python-versions = ">=3.7" -files = [ - {file = "SPARQLWrapper-2.0.0-py3-none-any.whl", hash = "sha256:c99a7204fff676ee28e6acef327dc1ff8451c6f7217dcd8d49e8872f324a8a20"}, - {file = "SPARQLWrapper-2.0.0.tar.gz", hash = "sha256:3fed3ebcc77617a4a74d2644b86fd88e0f32e7f7003ac7b2b334c026201731f1"}, -] - -[package.dependencies] -rdflib = ">=6.1.1" - -[package.extras] -dev = ["mypy (>=0.931)", "pandas (>=1.3.5)", "pandas-stubs (>=1.2.0.48)", "setuptools (>=3.7.1)"] -docs = ["sphinx (<5)", "sphinx-rtd-theme"] -keepalive = ["keepalive (>=0.5)"] -pandas = ["pandas (>=1.3.5)"] - -[[package]] -name = "tzdata" -version = "2023.3" -description = "Provider of IANA time zone data" -optional = false -python-versions = ">=2" -files = [ - {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, - {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, -] - -[[package]] -name = "tzlocal" -version = "5.2" -description = "tzinfo object for the local timezone" -optional = false -python-versions = ">=3.8" -files = [ - {file = "tzlocal-5.2-py3-none-any.whl", hash = "sha256:49816ef2fe65ea8ac19d19aa7a1ae0551c834303d5014c6d5a62e4cbda8047b8"}, - {file = "tzlocal-5.2.tar.gz", hash = "sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e"}, -] - -[package.dependencies] -tzdata = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] - -[[package]] -name = "uritemplate" -version = "4.1.1" -description = "Implementation of RFC 6570 URI Templates" -optional = false -python-versions = ">=3.6" -files = [ - {file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"}, - {file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"}, -] - -[[package]] -name = "urllib3" -version = "2.1.0" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.8" -files = [ - {file = "urllib3-2.1.0-py3-none-any.whl", hash = "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3"}, - {file = "urllib3-2.1.0.tar.gz", hash = "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[metadata] -lock-version = "2.0" -python-versions = "^3.11" -content-hash = "2dca960013881b31821cd303cd54f00e36cdb86e49bed187c5acfab72556900a" diff --git a/docker/lwua-dereferencer/dereferencer-py/pyproject.toml b/docker/lwua-dereferencer/dereferencer-py/pyproject.toml deleted file mode 100644 index 1e042db..0000000 --- a/docker/lwua-dereferencer/dereferencer-py/pyproject.toml +++ /dev/null @@ -1,24 +0,0 @@ -[tool.poetry] -name = "dereferencer-py" -version = "0.1.0" -description = "Dereferencing tool that will add triples to a knowledge graph given a config containing a SPARQL query and a list of property paths are given." -authors = ["cedricdcc <30471340+cedricdcc@users.noreply.github.com>"] -license = "CC0" -readme = "README.md" - -[tool.poetry.dependencies] -python = "^3.11" -apscheduler = "^3.10.4" -pyaml = "^23.9.7" -python-dotenv = "^1.0.0" -sparqlwrapper = "^2.0.0" -pyrdfj2 = "^0.0.5" -requests = "^2.31.0" - - -[tool.poetry.group.dev.dependencies] -pytest = "^7.4.3" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" From 5e939aae96dddba3632ec88827b6a2a6409534a0 Mon Sep 17 00:00:00 2001 From: cedricd Date: Wed, 17 Jan 2024 11:09:04 +0100 Subject: [PATCH 60/60] Update linting-python-files.yml --- .github/workflows/linting-python-files.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/linting-python-files.yml b/.github/workflows/linting-python-files.yml index 9f3cffb..90e0235 100644 --- a/.github/workflows/linting-python-files.yml +++ b/.github/workflows/linting-python-files.yml @@ -1,11 +1,8 @@ name: Python Linting on: - push: - paths: - - 'docker/lwua-ingest/**/*.py' - - 'docker/lwua-dereferencer/**/*.py' pull_request: + types: [closed] paths: - 'docker/lwua-ingest/**/*.py' - 'docker/lwua-dereferencer/**/*.py'