Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(py): Base Python validator on schema instead of hard-coded regexes #1967

Merged
merged 7 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
command: |
conda activate testenv
cd bids-validator
pip install .
pytest --doctest-modules bids_validator
flake8 bids_validator
pydocstyle bids_validator/bids_validator.py
Expand Down
258 changes: 175 additions & 83 deletions bids-validator/bids_validator/bids_validator.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,69 @@
"""Validation class for BIDS projects."""
import re
import logging
import os
import json
import re
from functools import lru_cache
from itertools import chain

import bidsschematools as bst
import bidsschematools.rules
import bidsschematools.schema
import bidsschematools.utils
import bidsschematools.validator


class LoggingContext:
# From logging cookbook (CC0):
# https://docs.python.org/3/howto/logging-cookbook.html#using-a-context-manager-for-selective-logging
#
# Changes:
# - Added docstrings (2024.05.06)
"""Context manager to temporarily modify logging configuration.

Parameters
----------
logger : logging.Logger
Logger object to be modified.
level : int
Logging level to set temporarily. If None, the level is not
modified.
handler : logging.Handler
Handler to add temporarily. If None, no handler is added.
close : bool
Whether to close the handler after removing it. Defaults to True.
"""


class BIDSValidator():
def __init__(self, logger, level=None, handler=None, close=True):
self.logger = logger
self.level = level
self.handler = handler
self.close = close

def __enter__(self):
if self.level is not None:
self.old_level = self.logger.level
self.logger.setLevel(self.level)
if self.handler:
self.logger.addHandler(self.handler)

def __exit__(self, et, ev, tb):
if self.level is not None:
self.logger.setLevel(self.old_level)
if self.handler:
self.logger.removeHandler(self.handler)
if self.handler and self.close:
self.handler.close()


class BIDSValidator:
"""Object for BIDS (Brain Imaging Data Structure) verification.

The main method of this class is `is_bids()`. You should use it for
checking whether a file path is compatible with BIDS.

"""

regexes = None

def __init__(self, index_associated=True):
"""Initialize BIDSValidator object.

Expand All @@ -25,10 +76,82 @@ def __init__(self, index_associated=True):
won't. Defaults to True.

"""
self.dir_rules = os.path.join(os.path.dirname(__file__)) + "/rules/"
self.index_associated = index_associated

def is_bids(self, path):
@classmethod
def _init_regexes(cls):
if cls.regexes is None:
with LoggingContext(bst.utils.get_logger(), level=logging.WARNING):
schema = bst.schema.load_schema()

all_rules = chain.from_iterable(
bst.rules.regexify_filename_rules(group, schema, level=2)
for group in (schema.rules.files.common, schema.rules.files.raw)
)
cls.regexes = [rule['regex'] for rule in all_rules]

@classmethod
def parse(cls, path):
"""Parse a file path into a dictionary of BIDS entities.

Parameters
----------
path : str
Path of a file to be parsed. Must be relative to root of a BIDS
dataset, and must include a leading forward slash `/`.

Returns
-------
dict
Dictionary of BIDS entities. Keys are entity names, values are
entity values. If the file path is not compatible with BIDS, an
empty dictionary is returned.

Notes
-----
When you test a file path, make sure that the path is relative to the
root of the BIDS dataset the file is part of. That is, as soon as the
file path contains parts outside of the BIDS dataset, the validation
will fail. For example "home/username/my_dataset/participants.tsv" will
fail, although "/participants.tsv" is a valid BIDS file.

Examples
--------
>>> from bids_validator import BIDSValidator
>>> validator = BIDSValidator()
>>> validator.parse("/sub-01/anat/sub-01_rec-CSD_T1w.nii.gz")
{'subject': '01', 'datatype': 'anat', 'reconstruction': 'CSD', 'suffix': 'T1w',
'extension': '.nii.gz'}
>>> validator.parse("/sub-01/anat/sub-01_acq-23_rec-CSD_T1w.exe")
{}
>>> validator.parse("home/username/my_dataset/participants.tsv")
Traceback (most recent call last):
...
ValueError: Path must be relative to root of a BIDS dataset, ...
>>> validator.parse("/participants.tsv")
{'stem': 'participants', 'extension': '.tsv'}

"""
if cls.regexes is None:
cls._init_regexes()

if path.startswith(os.sep):
path = path.replace(os.sep, '/')

if not path.startswith('/'):
raise ValueError("Path must be relative to root of a BIDS dataset,"
" and must include a leading forward slash `/`.")

for regex in cls.regexes:
match = re.match(regex, path[1:])
if match:
return {k: v for k, v in match.groupdict().items() if v is not None}

return {}

@classmethod
@lru_cache
def is_bids(cls, path):
"""Check if file path adheres to BIDS.

Main method of the validator. Uses other class methods for checking
Expand All @@ -52,10 +175,12 @@ def is_bids(self, path):
--------
>>> from bids_validator import BIDSValidator
>>> validator = BIDSValidator()
>>> filepaths = ["/sub-01/anat/sub-01_rec-CSD_T1w.nii.gz",
... "/sub-01/anat/sub-01_acq-23_rec-CSD_T1w.exe", # wrong extension
... "home/username/my_dataset/participants.tsv", # not relative to root
... "/participants.tsv"]
>>> filepaths = [
... "/sub-01/anat/sub-01_rec-CSD_T1w.nii.gz",
... "/sub-01/anat/sub-01_acq-23_rec-CSD_T1w.exe", # wrong extension
... "home/username/my_dataset/participants.tsv", # not relative to root
... "/participants.tsv",
... ]
>>> for filepath in filepaths:
... print(validator.is_bids(filepath))
True
Expand All @@ -64,90 +189,57 @@ def is_bids(self, path):
True

"""
return any(
check(path) for check in (
self.is_top_level,
self.is_associated_data,
self.is_session_level,
self.is_subject_level,
self.is_phenotypic,
self.is_file
)
)
try:
return cls.parse(path) != {}
except ValueError:
return False

def is_top_level(self, path):
@classmethod
def is_top_level(cls, path):
"""Check if the file has appropriate name for a top-level file."""
regexps = self.get_regular_expressions(self.dir_rules +
'top_level_rules.json')

return any(re.search(regexp, path) for regexp in regexps)
parts = cls.parse(path)
if not parts:
return False
return parts.get('subject') is None

def is_associated_data(self, path):
"""Check if file is appropriate associated data."""
if not self.index_associated:
return False

regexps = self.get_regular_expressions(self.dir_rules +
'associated_data_rules.json')

return any(re.search(regexp, path) for regexp in regexps)
parts = self.parse(path)
if not parts:
return False
return parts.get('path') in ('code', 'derivatives', 'stimuli', 'sourcedata')

def is_session_level(self, path):
@classmethod
def is_session_level(cls, path):
"""Check if the file has appropriate name for a session level."""
regexps = self.get_regular_expressions(self.dir_rules +
'session_level_rules.json')

return any(self.conditional_match(regexp, path) for regexp in regexps)
parts = cls.parse(path)
if not parts:
return False
return parts.get('datatype') is None and parts.get('suffix') != 'sessions'

def is_subject_level(self, path):
@classmethod
def is_subject_level(cls, path):
"""Check if the file has appropriate name for a subject level."""
regexps = self.get_regular_expressions(self.dir_rules +
'subject_level_rules.json')

return any(re.search(regexp, path) for regexp in regexps)

def is_phenotypic(self, path):
"""Check if file is phenotypic data."""
regexps = self.get_regular_expressions(self.dir_rules +
'phenotypic_rules.json')

return any(re.search(regexp, path) for regexp in regexps)
parts = cls.parse(path)
if not parts:
return False
return parts.get('suffix') == 'sessions'

def is_file(self, path):
@classmethod
def is_phenotypic(cls, path):
"""Check if file is phenotypic data."""
regexps = self.get_regular_expressions(self.dir_rules +
'file_level_rules.json')

return any(re.search(regexp, path) for regexp in regexps)

@staticmethod
@lru_cache
def get_regular_expressions(file_name):
"""Read regular expressions from a file."""
regexps = []

with open(file_name) as fin:
rules = json.load(fin)

for key in list(rules.keys()):
rule = rules[key]

regexp = rule["regexp"]

if "tokens" in rule:
tokens = rule["tokens"]

for token in list(tokens):
regexp = regexp.replace(token, "|".join(tokens[token]))

regexps.append(regexp)

return regexps
parts = cls.parse(path)
if not parts:
return False
return parts.get('datatype') == 'phenotype'

@staticmethod
def conditional_match(expression, path):
"""Find conditional match."""
match = re.compile(expression).findall(path)
match = match[0] if len(match) >= 1 else False
# adapted from JS code and JS does not support conditional groups
return bool(match) and (match[1] == match[2][1:] or not match[1])
@classmethod
def is_file(cls, path):
"""Check if file is a data file or non-inherited metadata file."""
parts = cls.parse(path)
if not parts:
return False
return parts.get('datatype') not in (None, 'phenotype')
1 change: 1 addition & 0 deletions bids-validator/bids_validator/test_bids_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
actual file contents.

"""

import os

import pytest
Expand Down
3 changes: 3 additions & 0 deletions bids-validator/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ classifiers = [
"Topic :: Scientific/Engineering",
]
requires-python = ">=3.8"
dependencies = [
"bidsschematools @ git+https://github.com/bids-standard/bids-specification.git#subdirectory=tools/schemacode",
]

[project.urls]
Homepage = "https://github.com/bids-standard/bids-validator"
Expand Down
7 changes: 5 additions & 2 deletions bids-validator/setup.cfg
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
[tool:pytest]
ignore =
_version.py
doctest_optionflags = ALLOW_UNICODE NORMALIZE_WHITESPACE ELLIPSIS

[flake8]
exclude =
_version.py
max-line-length = 88

[pydocstyle]
add-ignore = D105,D107
Loading