Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(regex): Construct backreferences to ensure entity consistency #1889

Merged
merged 2 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions tools/schemacode/bidsschematools/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ def _format_entity(entity, name, pattern, level, directory=False):
if directory and entity not in DIR_ENTITIES:
return ""

# For a directory entity appearing in a filename, match the value
# found in the directory name
# Note that this makes it impossible to do something like
# /ses-1_dwi.json instead of /sub-??/ses-1/sub-??_ses-1_dwi.json
# We'll assume this is a negligible use case.
# It is possible to create such a regular expression, but doing it
# in a piecewise fashion is difficult.
if not directory and entity in DIR_ENTITIES:
return rf"(?({entity}){name}-(?P={entity})_)"

label = _capture_regex(entity, pattern, not directory and entity in DIR_ENTITIES)
post = "/" if directory else "_"

Expand Down
37 changes: 31 additions & 6 deletions tools/schemacode/bidsschematools/tests/test_rules.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from bidsschematools import rules

from ..types import Namespace
Expand All @@ -13,19 +15,29 @@ def test_entity_rule(schema_obj):
"extensions": [".nii"],
}
)
assert rules._entity_rule(rule, schema_obj) == {
nii_rule = rules._entity_rule(rule, schema_obj)
assert nii_rule == {
"regex": (
r"sub-(?P<subject>[0-9a-zA-Z]+)/"
r"(?:ses-(?P<session>[0-9a-zA-Z]+)/)?"
r"(?P<datatype>anat)/"
r"sub-(?P=subject)_"
r"(?:ses-(?P=session)_)?"
r"(?(subject)sub-(?P=subject)_)"
r"(?(session)ses-(?P=session)_)"
r"(?P<suffix>T1w)"
r"(?P<extension>\.nii)\Z"
),
"mandatory": False,
}

assert re.match(nii_rule["regex"], "sub-01/anat/sub-01_T1w.nii")
assert re.match(nii_rule["regex"], "sub-01/ses-01/anat/sub-01_ses-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/anat/sub-02_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/sub-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/ses-01/anat/sub-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/anat/sub-01_ses-01_T1w.nii")
assert not re.match(nii_rule["regex"], "sub-01/ses-01/anat/sub-01_ses-02_T1w.nii")

# Sidecar entities are optional
rule = Namespace.build(
{
Expand All @@ -35,18 +47,31 @@ def test_entity_rule(schema_obj):
"extensions": [".json"],
}
)
assert rules._entity_rule(rule, schema_obj) == {
json_rule = rules._entity_rule(rule, schema_obj)
assert json_rule == {
"regex": (
r"(?:sub-(?P<subject>[0-9a-zA-Z]+)/)?"
r"(?:ses-(?P<session>[0-9a-zA-Z]+)/)?"
r"(?:(?P<datatype>anat)/)?"
r"(?:sub-(?P=subject)_)?"
r"(?:ses-(?P=session)_)?"
r"(?(subject)sub-(?P=subject)_)"
r"(?(session)ses-(?P=session)_)"
r"(?P<suffix>T1w)"
r"(?P<extension>\.json)\Z"
),
"mandatory": False,
}
assert re.match(json_rule["regex"], "sub-01/anat/sub-01_T1w.json")
assert re.match(json_rule["regex"], "sub-01/sub-01_T1w.json")
assert re.match(json_rule["regex"], "T1w.json")
assert re.match(json_rule["regex"], "sub-01/ses-01/anat/sub-01_ses-01_T1w.json")
assert re.match(json_rule["regex"], "sub-01/ses-01/sub-01_ses-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/anat/sub-02_T1w.json")
assert not re.match(json_rule["regex"], "sub-01_T1w.json")
assert not re.match(json_rule["regex"], "ses-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/ses-01/anat/sub-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/anat/sub-01_ses-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/ses-01/ses-01_T1w.json")
assert not re.match(json_rule["regex"], "sub-01/ses-01/anat/sub-01_ses-02_T1w.json")


def test_split_inheritance_rules():
Expand Down
36 changes: 36 additions & 0 deletions tools/schemacode/bidsschematools/tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,42 @@ def test_inheritance_examples():
assert result["path_tracking"] == incorrect_inheritance


def test_regression_examples():
"""Tests that failed on pybids when switching to bst-regex-based validation"""
examples = [
"/sub-01/ses-ses/sub-01_dwi.bval", # redundant dir /ses-ses/
"/sub-01/01_dwi.bvec", # missed subject suffix
"/sub-01/sub_dwi.json", # missed subject id
"/sub-01/sub-01_23_run-01_dwi.bval", # wrong _23_
"/sub-01/sub-01_run-01_dwi.vec", # wrong extension
"/sub-01/sub-01_run-01_dwi.jsn", # wrong extension
"/sub-01/sub-01_acq_dwi.bval", # missed suffix value
"/sub-01/sub-01_acq-23-singleband_dwi.bvec", # redundant -23-
"/sub-01/anat/sub-01_acq-singleband_dwi.json", # redundant /anat/
"/sub-01/sub-01_recrod-record_acq-singleband_run-01_dwi.bval", # redundant record-record_
"/sub_01/sub-01_acq-singleband_run-01_dwi.bvec", # wrong /sub_01/
"/sub-01/sub-01_acq-singleband__run-01_dwi.json", # wrong __
"/sub-01/ses-test/sub-01_ses_test_dwi.bval", # wrong ses_test
"/sub-01/ses-test/sb-01_ses-test_dwi.bvec", # wrong sb-01
"/sub-01/ses-test/sub-01_ses-test_dw.json", # wrong modality
"/sub-01/ses-test/sub-01_ses-test_run-01_dwi.val", # wrong extension
"/sub-01/ses-test/sub-01_run-01_dwi.bvec", # missed session in the filename
# This validator adds a .*/ to the regex, so this will be a false negative
# If I cared about this validator, I would dig into it, but it doesn't seem worth it.
# -cjm 2024.08.14
# "/sub-01/ses-test/ses-test_run-01_dwi.json", # missed subject in the filename
"/sub-01/ses-test/sub-01_ses-test_acq-singleband.bval", # missed modality
"/sub-01/ses-test/sub-01_ses-test_acq-singleband_dwi", # missed extension
"/ses-test/sub-01/sub-01_ses-test_acq-singleband_dwi.json", # wrong dirs order
"/sub-01/ses-test/sub-02_ses-test_acq-singleband_run-01_dwi.bval", # wrong sub id
"/sub-01/sub-01_ses-test_acq-singleband_run-01_dwi.bvec", # ses dir missed
"/ses-test/sub-01_ses-test_acq-singleband_run-01_dwi.json", # sub id dir missed
]

result = validate_bids(examples, dummy_paths=True)
assert result["path_tracking"] == examples


def test_write_report(tmp_path):
from bidsschematools.validator import write_report

Expand Down