Skip to content

Commit

Permalink
RF: Refactor validator building of filename regexes
Browse files Browse the repository at this point in the history
  • Loading branch information
effigies committed Sep 30, 2022
1 parent 8106dbd commit 098c3f2
Show file tree
Hide file tree
Showing 2 changed files with 253 additions and 532 deletions.
321 changes: 83 additions & 238 deletions tools/schemacode/bidsschematools/tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,266 +3,111 @@

import pytest

from .. import validator
from ..types import Namespace
from .conftest import BIDS_ERROR_SELECTION, BIDS_SELECTION


def test__add_entity():
from bidsschematools.validator import _add_entity
def test_path_rule():
rule = Namespace.build({"path": "dataset_description.json", "level": "required"})
assert validator._path_rule(rule) == {"regex": r"dataset_description\.json", "mandatory": True}

# Test empty input and directory creation and required entity
regex_entities = ""
entity = "subject"
entity_shorthand = "sub"
variable_field = "[0-9a-zA-Z]+"
requirement_level = "required"
rule = Namespace.build({"path": "LICENSE", "level": "optional"})
assert validator._path_rule(rule) == {"regex": "LICENSE", "mandatory": False}

_regex_entities = _add_entity(
regex_entities,
entity,
entity_shorthand,
variable_field,
requirement_level,
)

assert _regex_entities == "sub-(?P=subject)"

# Test append input and optional entity
regex_entities = (
"sub-(?P=subject)(|_ses-(?P=session))"
"(|_task-(?P<task>[0-9a-zA-Z]+))(|_trc-(?P<tracer>[0-9a-zA-Z]+))"
"(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
"(|_run-(?P<run>[0-9a-zA-Z]+))"
)
entity = "recording"
entity_shorthand = "recording"
variable_field = "[0-9a-zA-Z]+"
requirement_level = "optional"

_regex_entities = _add_entity(
regex_entities,
entity,
entity_shorthand,
variable_field,
requirement_level,
)

assert (
_regex_entities == "sub-(?P=subject)(|_ses-(?P=session))"
"(|_task-(?P<task>[0-9a-zA-Z]+))(|_trc-(?P<tracer>[0-9a-zA-Z]+))"
"(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
"(|_run-(?P<run>[0-9a-zA-Z]+))"
"(|_recording-(?P<recording>[0-9a-zA-Z]+))"
)


def test__add_extensions():
from bidsschematools.validator import _add_extensions

# Test single extension
regex_string = (
"sub-(?P=subject)(|_ses-(?P=session))"
"_sample-(?P<sample>[0-9a-zA-Z]+)"
"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))_photo"
)
variant = {
"suffixes": ["photo"],
"extensions": [".jpg"],
"entities": {
"subject": "required",
"session": "optional",
"sample": "required",
"acquisition": "optional",
},
def test_stem_rule():
rule = Namespace.build({"stem": "README", "level": "required", "extensions": ["", ".md"]})
assert validator._stem_rule(rule) == {
"regex": r"README(?P<extension>|\.md)",
"mandatory": True,
}
_regex_string = _add_extensions(regex_string, variant)

assert (
_regex_string == "sub-(?P=subject)(|_ses-(?P=session))"
"_sample-(?P<sample>[0-9a-zA-Z]+)"
"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))_photo\\.jpg"
rule = Namespace.build(
{"stem": "participants", "level": "optional", "extensions": [".tsv", ".json"]}
)

# Test multiple extensions
regex_string = (
"sub-(?P=subject)(|_ses-(?P=session))"
"_sample-(?P<sample>[0-9a-zA-Z]+)"
"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))_photo"
)
variant = {
"suffixes": ["photo"],
"extensions": [".jpg", ".png", ".tif"],
"entities": {
"subject": "required",
"session": "optional",
"sample": "required",
"acquisition": "optional",
},
assert validator._stem_rule(rule) == {
"regex": r"participants(?P<extension>\.tsv|\.json)",
"mandatory": False,
}
_regex_string = _add_extensions(regex_string, variant)

assert (
_regex_string == "sub-(?P=subject)(|_ses-(?P=session))"
"_sample-(?P<sample>[0-9a-zA-Z]+)"
"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))"
"_photo(\\.jpg|\\.png|\\.tif)"
)


def test__add_subdirs():
from bidsschematools.validator import _add_subdirs

regex_string = "sub-(?P=subject)_sessions\\.(tsv|json)"
variant = {
"suffixes": ["sessions"],
"extensions": [".tsv", ".json"],
"entities": {"subject": "required"},
}
datatype = "tabular_metadata"
entity_definitions = {
"acquisition": {
"display_name": "Acquisition",
"name": "acq",
"type": "string",
"format": "label",
},
"session": {
"display_name": "Session",
"name": "ses",
"type": "string",
"format": "label",
},
"subject": {
"display_name": "Subject",
"name": "sub",
"type": "string",
"format": "label",
},
}
formats = {
"label": {
"pattern": "[0-9a-zA-Z]+",
def test_entity_rule(schema_obj):
# Simple
rule = Namespace.build(
{
"datatypes": ["anat"],
"entities": {"subject": "required", "session": "optional"},
"suffixes": ["T1w"],
"extensions": [".nii"],
}
}
modality_datatypes = [
"anat",
"dwi",
"fmap",
"func",
"perf",
"eeg",
"ieeg",
"meg",
"beh",
"pet",
"micr",
]
_regex_string = _add_subdirs(
regex_string, variant, datatype, entity_definitions, formats, modality_datatypes
)

assert _regex_string == "/sub-(?P<subject>[0-9a-zA-Z]+)/sub-(?P=subject)_sessions\\.(tsv|json)"


def test__add_suffixes():
from bidsschematools.validator import _add_suffixes

# Test single expansion
regex_entities = "sub-(?P=subject)"
variant = {
"suffixes": ["sessions"],
"extensions": [
".tsv",
".json",
],
"entities": {"subject": "required"},
assert validator._entity_rule(rule, schema_obj) == {
"regex": (
r"sub-(?P<subject>[0-9a-zA-Z]+)/"
r"(?:ses-(?P<session>[0-9a-zA-Z]+)/)?"
r"(?P<datatype>anat)/"
r"sub-(?P=subject)_"
r"(?:ses-(?P=session)_)?"
r"(?P<suffix>T1w)"
r"(?P<extension>\.nii)"
),
"mandatory": False,
}
regex_string = "sub-(?P=subject)_sessions"

_regex_string = _add_suffixes(regex_entities, variant)

assert _regex_string == regex_string

# Test multiple expansions
regex_entities = (
"sub-(?P=subject)(|_ses-(?P=session))"
"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))"
"(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
"(|_dir-(?P<direction>[0-9a-zA-Z]+))(|_run-(?P<run>[0-9a-zA-Z]+))"
"(|_recording-(?P<recording>[0-9a-zA-Z]+))"
# Sidecar entities are optional
rule = Namespace.build(
{
"datatypes": ["anat", ""],
"entities": {"subject": "optional", "session": "optional"},
"suffixes": ["T1w"],
"extensions": [".json"],
}
)
variant = {
"suffixes": [
"physio",
"stim",
],
"extensions": [
".tsv.gz",
".json",
],
"entities": {
"subject": "required",
"session": "optional",
"acquisition": "optional",
"reconstruction": "optional",
"direction": "optional",
"run": "optional",
"recording": "optional",
},
assert validator._entity_rule(rule, schema_obj) == {
"regex": (
r"(?:sub-(?P<subject>[0-9a-zA-Z]+)/)?"
r"(?:ses-(?P<session>[0-9a-zA-Z]+)/)?"
r"(?:(?P<datatype>anat)/)?"
r"(?:sub-(?P=subject)_)?"
r"(?:ses-(?P=session)_)?"
r"(?P<suffix>T1w)"
r"(?P<extension>\.json)"
),
"mandatory": False,
}
regex_string = (
"sub-(?P=subject)(|_ses-(?P=session))"
"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))"
"(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
"(|_dir-(?P<direction>[0-9a-zA-Z]+))(|_run-(?P<run>[0-9a-zA-Z]+))"
"(|_recording-(?P<recording>[0-9a-zA-Z]+))"
"_(physio|stim)"
)

_regex_string = _add_suffixes(regex_entities, variant)

assert _regex_string == regex_string


@pytest.mark.parametrize("extension", ["bvec", "json", "tsv"])
def test__inheritance_expansion(extension):
from bidsschematools.validator import _inheritance_expansion
def test_split_inheritance_rules():
rule = {
"datatypes": ["anat"],
"entities": {"subject": "required", "session": "optional"},
"suffixes": ["T1w"],
"extensions": [".nii", ".json"],
}

# test .json
base_entry = (
r".*?/sub-(?P<subject>[0-9a-zA-Z]+)/"
r"(|ses-(?P<session>[0-9a-zA-Z]+)/)func/sub-(?P=subject)"
r"(|_ses-(?P=session))_task-(?P<task>[0-9a-zA-Z]+)"
r"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))"
r"(|_ce-(?P<ceagent>[0-9a-zA-Z]+))"
r"(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
r"(|_dir-(?P<direction>[0-9a-zA-Z]+))"
r"(|_run-(?P<run>[0-9]*[1-9]+[0-9]*))"
r"(|_echo-(?P<echo>[0-9]*[1-9]+[0-9]*))"
r"_phase(\.nii\.gz|\.nii|\.{})$".format(extension)
)
expected_entries = [
".*?/sub-(?P<subject>[0-9a-zA-Z]+)/"
"(|ses-(?P<session>[0-9a-zA-Z]+)/)sub-(?P=subject)"
"(|_ses-(?P=session))_task-(?P<task>[0-9a-zA-Z]+)"
"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))"
"(|_ce-(?P<ceagent>[0-9a-zA-Z]+))"
"(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
"(|_dir-(?P<direction>[0-9a-zA-Z]+))"
"(|_run-(?P<run>[0-9]*[1-9]+[0-9]*))"
"(|_echo-(?P<echo>[0-9]*[1-9]+[0-9]*))"
"_phase(\\.nii\\.gz|\\.nii|\\.{})$".format(extension),
".*?/task-(?P<task>[0-9a-zA-Z]+)"
"(|_acq-(?P<acquisition>[0-9a-zA-Z]+))"
"(|_ce-(?P<ceagent>[0-9a-zA-Z]+))"
"(|_rec-(?P<reconstruction>[0-9a-zA-Z]+))"
"(|_dir-(?P<direction>[0-9a-zA-Z]+))"
"(|_run-(?P<run>[0-9]*[1-9]+[0-9]*))"
"(|_echo-(?P<echo>[0-9]*[1-9]+[0-9]*))"
"_phase(\\.nii\\.gz|\\.nii|\\.{})$".format(extension),
]
main, sidecar = validator.split_inheritance_rules(rule)
assert main == {
"datatypes": ["anat"],
"entities": {"subject": "required", "session": "optional"},
"suffixes": ["T1w"],
"extensions": [".nii"],
}
assert sidecar == {
"datatypes": ["", "anat"],
"entities": {"subject": "optional", "session": "optional"},
"suffixes": ["T1w"],
"extensions": [".json"],
}

inheritance_expanded_entries = _inheritance_expansion(base_entry, datatype="func")
assert inheritance_expanded_entries == expected_entries
# Can't split again
(main2,) = validator.split_inheritance_rules(main)
assert main2 == {
"datatypes": ["anat"],
"entities": {"subject": "required", "session": "optional"},
"suffixes": ["T1w"],
"extensions": [".nii"],
}


def test_inheritance_examples():
Expand Down
Loading

0 comments on commit 098c3f2

Please sign in to comment.