Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph comparison ignoring URNs #136

Merged
merged 3 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 44 additions & 3 deletions bids_prov/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import shutil
import uuid
from typing import Mapping, Union, Tuple
import re

CONTEXT_URL = "https://raw.githubusercontent.com/bids-standard/BEP028_BIDSprov/master/context.json"

Expand Down Expand Up @@ -120,6 +121,44 @@ def compute_sha_256_entity(entities: dict):
shutil.rmtree(directory)


def simplify_urns(graph: str) -> str:
"""
Replace URNs of a json-ld graph with simpler (not random) values defined by their order of appearance in the graph.

Parameters
----------
graph : str
The json-ld graph dumped as a string

Returns
-------
str
The input string with simplified urns.
"""

# Get all occurrences of ("urn:" + 36 following chars)
urn_occurrences = [i for i in re.finditer(r'urn:[a-zA-Z0-9\-]{36}', graph)]

# Prepare a dict in which :
# - key is an urn as present in the original graph
# - value is an id corresponding to the order of appearance of the urn in the graph.
known_urns = dict()
counter = 0

# Assign one simpler id to each urn
for urn in urn_occurrences:
if urn.group() not in known_urns:
known_urns[urn.group()] = f"urn:{str(counter).zfill(36)}"
counter += 1

# Replace all urns in the input graph with their assigned id
output_graph = graph
for key, value in known_urns.items():
output_graph = output_graph.replace(key, value)

return output_graph


def writing_jsonld(graph, indent, output_file):
"""
Write a json-ld in memory unless it already exists and contains the same content
Expand All @@ -140,10 +179,12 @@ def writing_jsonld(graph, indent, output_file):
"""
if os.path.isfile(output_file):
with open(output_file, "r") as f:
existing_content = f.read()
existing_content = simplify_urns(f.read())

new_content = simplify_urns(json.dumps(graph, indent=indent))

if existing_content == json.dumps(graph, indent=indent):
return True
if existing_content == new_content:
return True

with open(output_file, "w") as fd:
json.dump(graph, fd, indent=indent)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Date : 2024_05_23_08h10m05s
Date : 2024_06_12_12h43m15s
Processing files...
file= nidmresults-examples/afni_alt_onesided_proc.sub_001
file= nidmresults-examples/afni_alt_onesided_proc.sub_001
Expand Down Expand Up @@ -69,4 +69,4 @@ Processing files...
file= nidmresults-examples/spm_thr_voxelfdrp05_batch.m
file= nidmresults-examples/spm_thr_voxelfwep05_batch.m
file= nidmresults-examples/spm_thr_voxelunct4_batch.m
End of processed files. Results in dir : 'examples/from_parsers'. Time required: 0:00:01.882820
End of processed files. Results in dir : 'examples/from_parsers'. Time required: 0:00:02.104508