From f74041559573feb08324b923d101606602ca7b83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Thu, 3 Oct 2024 16:16:07 +0200 Subject: [PATCH] Safer URNs for all parsers --- bids_prov/afni/afni_parser.py | 14 +++++++++++--- bids_prov/fsl/fsl_parser.py | 14 +++++++++++--- bids_prov/spm/spm_parser.py | 5 ++--- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/bids_prov/afni/afni_parser.py b/bids_prov/afni/afni_parser.py index 7ed5cce2..3fb19e9d 100644 --- a/bids_prov/afni/afni_parser.py +++ b/bids_prov/afni/afni_parser.py @@ -9,7 +9,7 @@ from bids_prov.fsl.fsl_parser import get_entities from bids_prov.utils import ( get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity, - get_activity_urn, get_agent_urn, get_entity_urn, + get_activity_urn, get_agent_urn, get_entity_urn, make_alnum, get_uuid, writing_jsonld ) @@ -206,7 +206,11 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False): } for input_path in inputs: - input_id = get_entity_urn(input_path) + # Deal with not human readable paths + if not make_alnum(input_path): + input_id = 'urn:uuid:' + get_uuid() + else: + input_id = get_entity_urn(input_path) existing_input = next( (entity for entity in records["Entities"] if entity["AtLocation"] == input_path), None) @@ -227,9 +231,13 @@ def build_records(commands_block: list, agent_id: str, verbose: bool = False): activity["Used"] = sorted(set(activity["Used"])) for output_path in outputs: + if not make_alnum(output_path): + output_id = 'urn:uuid:' + get_uuid() + else: + output_id = get_entity_urn(output_path) records["Entities"].append( { - "@id": get_entity_urn(output_path), + "@id": output_id, "Label": os.path.split(output_path)[1], "AtLocation": output_path, "GeneratedBy": activity["@id"], diff --git a/bids_prov/fsl/fsl_parser.py b/bids_prov/fsl/fsl_parser.py index 9e4eaab6..c4d0b344 100644 --- a/bids_prov/fsl/fsl_parser.py +++ b/bids_prov/fsl/fsl_parser.py @@ -10,7 +10,7 @@ from bids_prov.utils import ( get_default_graph, CONTEXT_URL, label_mapping, compute_sha_256_entity, - get_activity_urn, get_agent_urn, get_entity_urn, + get_activity_urn, get_agent_urn, get_entity_urn, make_alnum, get_uuid, writing_jsonld ) @@ -502,7 +502,10 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str, verbose: bool for input_path in inputs: # input_name = input_path.replace("/", "_") # TODO - input_id = get_entity_urn(input_path) + if not make_alnum(input_path): + input_id = 'urn:uuid:' + get_uuid() + else: + input_id = get_entity_urn(input_path) existing_input = next( (e for e in records["Entities"] if e["AtLocation"] == input_path), None) @@ -522,9 +525,14 @@ def build_records(groups: Mapping[str, List[str]], agent_id: str, verbose: bool for output_path in outputs: # output_name = output_path.replace("/", "_") # TODO + if not make_alnum(output_path): + output_id = 'urn:uuid:' + get_uuid() + else: + output_id = get_entity_urn(output_path) + records["Entities"].append( { - "@id": get_entity_urn(output_path), + "@id": output_id, "Label": os.path.split(output_path)[1], "AtLocation": output_path, "GeneratedBy": activity["@id"], diff --git a/bids_prov/spm/spm_parser.py b/bids_prov/spm/spm_parser.py index 1bc5b478..06bb4491 100644 --- a/bids_prov/spm/spm_parser.py +++ b/bids_prov/spm/spm_parser.py @@ -65,12 +65,11 @@ def get_input_entity(right: str) -> List[dict]: entity_label_short = "_".join(file_location.split("/")[-2:]) # Sub01_con_0001.nii entity = { "@id": get_entity_urn( - "/"+"/".join(file_location.split("/")[1:]), - file_location.split("/")[0]), + "/"+"/".join(file_location.strip("/").split("/")[1:]), + file_location.strip("/").split("/")[0]), "Label": label_mapping(entity_label_short, "spm/spm_activity_labels.json"), "AtLocation": file_location } - entities.append(entity) return entities