Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Subset items tool #42

Draft
wants to merge 31 commits into
base: ref/linkml
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
87f7a6a
add print for testing
yibeichan Jan 25, 2024
3162782
update clear_header
yibeichan Jan 25, 2024
64521d9
remove print
yibeichan Jan 25, 2024
4c1081a
fix order and other errors
yibeichan Feb 7, 2024
50fe4ce
change ui yesno to radio
yibeichan Feb 7, 2024
4359791
fix typo
yibeichan Feb 7, 2024
b17b5c7
update context, field->item, fix isVis
yibeichan Feb 22, 2024
ca3162d
remove useless due to failed validation
yibeichan Feb 23, 2024
4288f8a
remove visibility at the item level & remove matrixInfo
yibeichan Feb 23, 2024
57ca52e
fix choice
yibeichan Feb 25, 2024
82e2300
remove identifier
yibeichan Feb 28, 2024
c6cabf5
updating validate command to the new pydantic model
djarecka Apr 5, 2024
ad8a82c
updating/fixing the tests; updating the model to use CreativeWork; ch…
djarecka Apr 18, 2024
3c7049f
fix conversion tests
yibeichan Apr 21, 2024
a60612f
remove test output
yibeichan Apr 21, 2024
e1e847d
change test output directory
yibeichan Apr 21, 2024
51d30b7
final improvments on tests
yibeichan Apr 21, 2024
ab7c051
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 21, 2024
0543e41
model version after adding Thing class
djarecka Apr 27, 2024
105489b
Merge pull request #37 from yibeichan/master
yibeichan Apr 29, 2024
5c0dff1
Merge branch 'master' into ref/linkml
djarecka May 2, 2024
36bbb36
updating model after removing CreativeWork and ImageUrl
djarecka May 9, 2024
2f3e3ca
adding tests to initialize the model classes
djarecka May 9, 2024
2e54331
fixing load_file; adding write_obj_jsonld function and expanding test…
djarecka May 10, 2024
71e90f0
changing redcap2reproschema to use ned pydantic classes; some small c…
djarecka May 13, 2024
ecc93b7
Merge remote-tracking branch 'remotes/djarecka/ref/linkml' into ref/l…
ibevers May 13, 2024
e570714
Start function for loading activities for easy editing
ibevers May 15, 2024
65590c0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 15, 2024
03b86cb
Update provisional subsetting items code
ibevers May 16, 2024
5b2ff4d
Resolve merge conflict from origin
ibevers May 16, 2024
5e80b5e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions reproschema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ def main(log_level):


@main.command()
@click.option("--shapefile", default=None, type=click.Path(exists=True, dir_okay=False))
@click.argument("path", nargs=1, type=str)
def validate(shapefile, path):
def validate(path):
if not (path.startswith("http") or os.path.exists(path)):
raise ValueError(f"{path} must be a URL or an existing file or directory")
from .validate import validate

validate(shapefile, path)
result = validate(path)
if result:
click.echo("Validation successful")


@main.command()
Expand Down
218 changes: 171 additions & 47 deletions reproschema/jsonldutils.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,201 @@
from pyld import jsonld
from pyshacl import validate as shacl_validate
import json
import os
from .utils import start_server, stop_server, lgr
from pathlib import Path
from copy import deepcopy
from urllib.parse import urlparse
from .utils import start_server, stop_server, lgr, fixing_old_schema
from .models import Item, Activity, Protocol, ResponseOption, ResponseActivity, Response


def _is_url(path):
"""
Determine whether the given path is a URL.
"""
parsed = urlparse(path)
return parsed.scheme in ("http", "https", "ftp", "ftps")


def _is_file(path):
"""
Determine whether the given path is a valid file path.
"""
return os.path.isfile(path)


def load_file(path_or_url, started=False, http_kwargs={}):
try:
"""Load a file or URL and return the expanded JSON-LD data."""
path_or_url = str(path_or_url)
if _is_url(path_or_url):
data = jsonld.expand(path_or_url)
if len(data) == 1:
if "@id" not in data[0]:
if "@id" not in data[0] and "id" not in data[0]:
data[0]["@id"] = path_or_url
except jsonld.JsonLdError as e:
if 'only "http" and "https"' in str(e):
lgr.debug("Reloading with local server")
root = os.path.dirname(path_or_url)
if not started:
stop, port = start_server(**http_kwargs)
else:
if "port" not in http_kwargs:
raise KeyError("port key missing in http_kwargs")
port = http_kwargs["port"]
base_url = f"http://localhost:{port}/"
if root:
base_url += f"{root}/"
with open(path_or_url) as json_file:
data = json.load(json_file)
try:
data = jsonld.expand(data, options={"base": base_url})
except:
raise
finally:
if not started:
stop_server(stop)
if len(data) == 1:
if "@id" not in data[0]:
data[0]["@id"] = base_url + os.path.basename(path_or_url)
elif _is_file(path_or_url):
lgr.debug("Reloading with local server")
root = os.path.dirname(path_or_url)
if not started:
stop, port = start_server(**http_kwargs)
else:
if "port" not in http_kwargs:
raise KeyError("port key missing in http_kwargs")
port = http_kwargs["port"]
base_url = f"http://localhost:{port}/"
if root:
base_url += f"{root}/"
with open(path_or_url) as json_file:
data = json.load(json_file)
try:
data = jsonld.expand(data, options={"base": base_url})
except:
raise
finally:
if not started:
stop_server(stop)
if len(data) == 1:
if "@id" not in data[0] and "id" not in data[0]:
data[0]["@id"] = base_url + os.path.basename(path_or_url)
else:
raise Exception(f"{path_or_url} is not a valid URL or file path")
return data


def validate_data(data, shape_file_path):
"""Validate an expanded jsonld document against a shape.
# def load_directory(path_or_url, load_file=load_file):
# """Creates a dictionary mirroring a directory containing only directories and
# JSON-LD files at the specified path."""

"""
#start the server

#stop the server
Base URL
directory = {

}

"""
# loaded_directory = {}

# directory_structure = {}

# for root, dirs, files in os.walk(base_path):
# relative_root = os.path.relpath(root, base_path)
# if relative_root == '.':
# relative_root = ''

# subdirs = {}
# for subdir in dirs:
# subdir_path = os.path.join(root, subdir)
# subdirs[subdir] = load_directory_structure(subdir_path, load_jsonld_function)

# jsonld_files = {}
# for file in files:
# if file.endswith('.jsonld'):
# file_path = os.path.join(root, file)
# jsonld_files[file] = load_jsonld_function(file_path)

# if relative_root:
# directory_structure[relative_root] = {'subdirs': subdirs, 'jsonld_files': jsonld_files}
# else:
# directory_structure.update(subdirs)
# directory_structure.update(jsonld_files)


# return directory_structure
# def load_directory_structure(base_path, started=False, http_kwargs={}):
# """
# Recursively iterates over a directory structure and constructs a dictionary.

# Args:
# - base_path (str): The base directory path to start iterating from.
# - load_jsonld_function (function): A function that takes a file path and returns the loaded JSON-LD data.

# Returns:
# - dict: A dictionary with directory names as keys and subdirectory names or loaded JSON-LD as values.
# """

# if not started:
# stop_server(stop)
# stop, port = start_server(**http_kwargs)
# started = True

# directory_structure = {}

# for root, dirs, files in os.walk(base_path):
# relative_root = os.path.relpath(root, base_path)
# if relative_root == '.':
# relative_root = ''


# subdirs = {}
# for subdir in dirs:
# subdir_path = os.path.join(root, subdir)
# subdirs[subdir] = load_directory_structure(subdir_path)

# jsonld_files = {}
# for file in files:
# file_path = os.path.join(root, file)
# jsonld_files[file] = load_file(file_path, started=True, http_kwargs={"port":port})

# if relative_root:
# directory_structure[relative_root] = {'subdirs': subdirs, 'jsonld_files': jsonld_files}
# else:
# directory_structure.update(subdirs)
# directory_structure.update(jsonld_files)


# stop_server(stop)

# return directory_structure


def validate_data(data):
"""Validate an expanded jsonld document against the pydantic model.

Parameters
----------
data : dict
Python dictionary containing JSONLD object
shape_file_path : str
SHACL file for the document

Returns
-------
conforms: bool
Whether the document is conformant with the shape
v_text: str
Validation information returned by PySHACL
Validation errors if any returned by pydantic

"""
kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
normalized = jsonld.normalize(data, kwargs)
data_file_format = "nquads"
shape_file_format = "turtle"
conforms, v_graph, v_text = shacl_validate(
normalized,
shacl_graph=shape_file_path,
data_graph_format=data_file_format,
shacl_graph_format=shape_file_format,
inference="rdfs",
debug=False,
serialize_report_graph=True,
)
# do we need it?
# kwargs = {"algorithm": "URDNA2015", "format": "application/n-quads"}
# normalized = jsonld.normalize(data, kwargs)
if data[0]["@type"][0] == "http://schema.repronim.org/Field":
obj_type = Item
elif data[0]["@type"][0] == "http://schema.repronim.org/ResponseOption":
obj_type = ResponseOption
elif data[0]["@type"][0] == "http://schema.repronim.org/Activity":
obj_type = Activity
elif data[0]["@type"][0] == "http://schema.repronim.org/Protocol":
obj_type = Protocol
elif data[0]["@type"][0] == "http://schema.repronim.org/ResponseActivity":
obj_type = ResponseActivity
elif data[0]["@type"][0] == "http://schema.repronim.org/Response":
obj_type = Response
else:
raise ValueError("Unknown type")
data_fixed = [fixing_old_schema(data[0], copy_data=True)]
# TODO: where should we load the context from?
contexfile = Path(__file__).resolve().parent / "models/reproschema"
with open(contexfile) as fp:
context = json.load(fp)
data_fixed_comp = jsonld.compact(data_fixed, context)
del data_fixed_comp["@context"]
conforms = False
v_text = ""
try:
obj_type(**data_fixed_comp)
conforms = True
except Exception as e:
v_text = str(e)
return conforms, v_text


Expand Down
5 changes: 2 additions & 3 deletions reproschema/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from .protocol import Protocol
from .activity import Activity
from .item import Item
from .model import Activity, Item, Protocol, ResponseOption, ResponseActivity, Response
from .utils import load_schema, write_obj_jsonld
66 changes: 0 additions & 66 deletions reproschema/models/activity.py

This file was deleted.

Loading