From 6f8c9656c27f64ee1b60e9d32a04cd83c95136a3 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 22 Mar 2024 16:52:48 -0400 Subject: [PATCH] ENH: Use BIDS URIs to track Sources in sidecars (#3255) Closes #3252. This is just a first pass- it only modifies the RawSources for now. I can add the immediate Sources in another PR (this'll require a lot more effort). ## Changes proposed in this pull request - Add `DatasetLinks` field to the `dataset_description.json` - The input dataset is called `raw` - Any other datasets supplied through `--derivatives` are automatically named `deriv-`. We might want to support named derivatives at some point. - Replace BIDS-relative paths in `RawSources` fields with BIDS-URIs. - Change `RawSources` to `Sources` in the sidecar files, since `RawSources` is deprecated. ## Documentation that should be reviewed I'll probably need to update the documentation, but haven't yet. --------- Co-authored-by: Mathias Goncalves --- fmriprep/cli/run.py | 6 +- fmriprep/config.py | 12 ++++ fmriprep/interfaces/bids.py | 62 +++++++++++++++++++ fmriprep/interfaces/tests/test_bids.py | 72 ++++++++++++++++++++++ fmriprep/utils/bids.py | 71 ++++++++++++++++++++- fmriprep/workflows/bold/base.py | 2 + fmriprep/workflows/bold/fit.py | 30 ++++----- fmriprep/workflows/bold/outputs.py | 85 +++++++++++++++++++------- 8 files changed, 303 insertions(+), 37 deletions(-) create mode 100644 fmriprep/interfaces/bids.py create mode 100644 fmriprep/interfaces/tests/test_bids.py diff --git a/fmriprep/cli/run.py b/fmriprep/cli/run.py index 6bbd9c793..a3f99b61b 100644 --- a/fmriprep/cli/run.py +++ b/fmriprep/cli/run.py @@ -222,7 +222,11 @@ def main(): config.execution.run_uuid, session_list=session_list, ) - write_derivative_description(config.execution.bids_dir, config.execution.fmriprep_dir) + write_derivative_description( + config.execution.bids_dir, + config.execution.fmriprep_dir, + dataset_links=config.execution.dataset_links, + ) write_bidsignore(config.execution.fmriprep_dir) if failed_reports: diff --git a/fmriprep/config.py b/fmriprep/config.py index d0d4668e8..e01acec98 100644 --- a/fmriprep/config.py +++ b/fmriprep/config.py @@ -226,6 +226,8 @@ def load(cls, settings, init=True, ignore=None): if k in cls._paths: if isinstance(v, list | tuple): setattr(cls, k, [Path(val).absolute() for val in v]) + elif isinstance(v, dict): + setattr(cls, k, {key: Path(val).absolute() for key, val in v.items()}) else: setattr(cls, k, Path(v).absolute()) elif hasattr(cls, k): @@ -251,6 +253,8 @@ def get(cls): if k in cls._paths: if isinstance(v, list | tuple): v = [str(val) for val in v] + elif isinstance(v, dict): + v = {key: str(val) for key, val in v.items()} else: v = str(v) if isinstance(v, SpatialReferences): @@ -439,6 +443,8 @@ class execution(_Config): """Path to a working directory where intermediate results will be available.""" write_graph = False """Write out the computational graph corresponding to the planned preprocessing.""" + dataset_links = {} + """A dictionary of dataset links to be used to track Sources in sidecars.""" _layout = None @@ -454,6 +460,7 @@ class execution(_Config): 'output_dir', 'templateflow_home', 'work_dir', + 'dataset_links', ) @classmethod @@ -518,6 +525,11 @@ def _process_value(value): for k, v in filters.items(): cls.bids_filters[acq][k] = _process_value(v) + dataset_links = {'raw': cls.bids_dir} + for i_deriv, deriv_path in enumerate(cls.derivatives): + dataset_links[f'deriv-{i_deriv}'] = deriv_path + cls.dataset_links = dataset_links + if 'all' in cls.debug: cls.debug = list(DEBUG_MODES) diff --git a/fmriprep/interfaces/bids.py b/fmriprep/interfaces/bids.py new file mode 100644 index 000000000..87ec5d630 --- /dev/null +++ b/fmriprep/interfaces/bids.py @@ -0,0 +1,62 @@ +"""BIDS-related interfaces.""" + +from pathlib import Path + +from bids.utils import listify +from nipype.interfaces.base import ( + DynamicTraitedSpec, + SimpleInterface, + TraitedSpec, + isdefined, + traits, +) +from nipype.interfaces.io import add_traits +from nipype.interfaces.utility.base import _ravel + +from ..utils.bids import _find_nearest_path + + +class _BIDSURIInputSpec(DynamicTraitedSpec): + dataset_links = traits.Dict(mandatory=True, desc='Dataset links') + out_dir = traits.Str(mandatory=True, desc='Output directory') + + +class _BIDSURIOutputSpec(TraitedSpec): + out = traits.List( + traits.Str, + desc='BIDS URI(s) for file', + ) + + +class BIDSURI(SimpleInterface): + """Convert input filenames to BIDS URIs, based on links in the dataset. + + This interface can combine multiple lists of inputs. + """ + + input_spec = _BIDSURIInputSpec + output_spec = _BIDSURIOutputSpec + + def __init__(self, numinputs=0, **inputs): + super().__init__(**inputs) + self._numinputs = numinputs + if numinputs >= 1: + input_names = [f'in{i + 1}' for i in range(numinputs)] + else: + input_names = [] + add_traits(self.inputs, input_names) + + def _run_interface(self, runtime): + inputs = [getattr(self.inputs, f'in{i + 1}') for i in range(self._numinputs)] + in_files = listify(inputs) + in_files = _ravel(in_files) + # Remove undefined inputs + in_files = [f for f in in_files if isdefined(f)] + # Convert the dataset links to BIDS URI prefixes + updated_keys = {f'bids:{k}:': Path(v) for k, v in self.inputs.dataset_links.items()} + updated_keys['bids::'] = Path(self.inputs.out_dir) + # Convert the paths to BIDS URIs + out = [_find_nearest_path(updated_keys, f) for f in in_files] + self._results['out'] = out + + return runtime diff --git a/fmriprep/interfaces/tests/test_bids.py b/fmriprep/interfaces/tests/test_bids.py new file mode 100644 index 000000000..b30da3d10 --- /dev/null +++ b/fmriprep/interfaces/tests/test_bids.py @@ -0,0 +1,72 @@ +"""Tests for fmriprep.interfaces.bids.""" + + +def test_BIDSURI(): + """Test the BIDSURI interface.""" + from fmriprep.interfaces.bids import BIDSURI + + dataset_links = { + 'raw': '/data', + 'deriv-0': '/data/derivatives/source-1', + } + out_dir = '/data/derivatives/fmriprep' + + # A single element as a string + interface = BIDSURI( + numinputs=1, + dataset_links=dataset_links, + out_dir=out_dir, + ) + interface.inputs.in1 = '/data/sub-01/func/sub-01_task-rest_bold.nii.gz' + results = interface.run() + assert results.outputs.out == ['bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz'] + + # A single element as a list + interface = BIDSURI( + numinputs=1, + dataset_links=dataset_links, + out_dir=out_dir, + ) + interface.inputs.in1 = ['/data/sub-01/func/sub-01_task-rest_bold.nii.gz'] + results = interface.run() + assert results.outputs.out == ['bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz'] + + # Two inputs: a string and a list + interface = BIDSURI( + numinputs=2, + dataset_links=dataset_links, + out_dir=out_dir, + ) + interface.inputs.in1 = '/data/sub-01/func/sub-01_task-rest_bold.nii.gz' + interface.inputs.in2 = [ + '/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz', + '/out/sub-01/func/sub-01_task-rest_bold.nii.gz', + ] + results = interface.run() + assert results.outputs.out == [ + 'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz', + 'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz', + '/out/sub-01/func/sub-01_task-rest_bold.nii.gz', # No change + ] + + # Two inputs as lists + interface = BIDSURI( + numinputs=2, + dataset_links=dataset_links, + out_dir=out_dir, + ) + interface.inputs.in1 = [ + '/data/sub-01/func/sub-01_task-rest_bold.nii.gz', + 'bids:raw:sub-01/func/sub-01_task-rest_boldref.nii.gz', + ] + interface.inputs.in2 = [ + '/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz', + '/out/sub-01/func/sub-01_task-rest_bold.nii.gz', + ] + results = interface.run() + assert results.outputs.out == [ + 'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz', + 'bids:raw:sub-01/func/sub-01_task-rest_boldref.nii.gz', # No change + 'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz', + '/out/sub-01/func/sub-01_task-rest_bold.nii.gz', # No change + ] diff --git a/fmriprep/utils/bids.py b/fmriprep/utils/bids.py index b139c72b8..10461dd68 100644 --- a/fmriprep/utils/bids.py +++ b/fmriprep/utils/bids.py @@ -97,7 +97,7 @@ def write_bidsignore(deriv_dir): ignore_file.write_text('\n'.join(bids_ignore) + '\n') -def write_derivative_description(bids_dir, deriv_dir): +def write_derivative_description(bids_dir, deriv_dir, dataset_links=None): from .. import __version__ DOWNLOAD_URL = f'https://github.com/nipreps/fmriprep/archive/{__version__}.tar.gz' @@ -145,6 +145,10 @@ def write_derivative_description(bids_dir, deriv_dir): if 'License' in orig_desc: desc['License'] = orig_desc['License'] + # Add DatasetLinks + if dataset_links: + desc['DatasetLinks'] = {k: str(v) for k, v in dataset_links.items()} + Path.write_text(deriv_dir / 'dataset_description.json', json.dumps(desc, indent=4)) @@ -343,3 +347,68 @@ def dismiss_echo(entities=None): entities.append('echo') return entities + + +def _find_nearest_path(path_dict, input_path): + """Find the nearest relative path from an input path to a dictionary of paths. + + If ``input_path`` is not relative to any of the paths in ``path_dict``, + the absolute path string is returned. + + If ``input_path`` is already a BIDS-URI, then it will be returned unmodified. + + Parameters + ---------- + path_dict : dict of (str, Path) + A dictionary of paths. + input_path : Path + The input path to match. + + Returns + ------- + matching_path : str + The nearest relative path from the input path to a path in the dictionary. + This is either the concatenation of the associated key from ``path_dict`` + and the relative path from the associated value from ``path_dict`` to ``input_path``, + or the absolute path to ``input_path`` if no matching path is found from ``path_dict``. + + Examples + -------- + >>> from pathlib import Path + >>> path_dict = { + ... 'bids::': Path('/data/derivatives/fmriprep'), + ... 'bids:raw:': Path('/data'), + ... 'bids:deriv-0:': Path('/data/derivatives/source-1'), + ... } + >>> input_path = Path('/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz') + >>> _find_nearest_path(path_dict, input_path) # match to 'bids:deriv-0:' + 'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz' + >>> input_path = Path('/out/sub-01/func/sub-01_task-rest_bold.nii.gz') + >>> _find_nearest_path(path_dict, input_path) # no match- absolute path + '/out/sub-01/func/sub-01_task-rest_bold.nii.gz' + >>> input_path = Path('/data/sub-01/func/sub-01_task-rest_bold.nii.gz') + >>> _find_nearest_path(path_dict, input_path) # match to 'bids:raw:' + 'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz' + >>> input_path = 'bids::sub-01/func/sub-01_task-rest_bold.nii.gz' + >>> _find_nearest_path(path_dict, input_path) # already a BIDS-URI + 'bids::sub-01/func/sub-01_task-rest_bold.nii.gz' + """ + # Don't modify BIDS-URIs + if isinstance(input_path, str) and input_path.startswith('bids:'): + return input_path + + input_path = Path(input_path) + matching_path = None + for key, path in path_dict.items(): + if input_path.is_relative_to(path): + relative_path = input_path.relative_to(path) + if (matching_path is None) or (len(relative_path.parts) < len(matching_path.parts)): + matching_key = key + matching_path = relative_path + + if matching_path is None: + matching_path = str(input_path.absolute()) + else: + matching_path = f'{matching_key}{matching_path}' + + return matching_path diff --git a/fmriprep/workflows/bold/base.py b/fmriprep/workflows/bold/base.py index b5e97e8bb..105ce6fdb 100644 --- a/fmriprep/workflows/bold/base.py +++ b/fmriprep/workflows/bold/base.py @@ -328,6 +328,8 @@ def init_bold_wf( workflow.connect([ (bold_fit_wf, ds_bold_native_wf, [ ('outputnode.bold_mask', 'inputnode.bold_mask'), + ('outputnode.motion_xfm', 'inputnode.motion_xfm'), + ('outputnode.boldref2fmap_xfm', 'inputnode.boldref2fmap_xfm'), ]), (bold_native_wf, ds_bold_native_wf, [ ('outputnode.bold_native', 'inputnode.bold'), diff --git a/fmriprep/workflows/bold/fit.py b/fmriprep/workflows/bold/fit.py index c5013c9db..25fca9134 100644 --- a/fmriprep/workflows/bold/fit.py +++ b/fmriprep/workflows/bold/fit.py @@ -378,6 +378,10 @@ def init_bold_fit_wf( # fmt:on # Stage 1: Generate motion correction boldref + hmc_boldref_source_buffer = pe.Node( + niu.IdentityInterface(fields=['in_file']), + name='hmc_boldref_source_buffer', + ) if not have_hmcref: config.loggers.workflow.info('Stage 1: Adding HMC boldref workflow') hmc_boldref_wf = init_raw_boldref_wf( @@ -395,7 +399,6 @@ def init_bold_fit_wf( ) ds_hmc_boldref_wf.inputs.inputnode.source_files = [bold_file] - # fmt:off workflow.connect([ (hmc_boldref_wf, hmcref_buffer, [ ('outputnode.bold_file', 'bold_file'), @@ -407,8 +410,10 @@ def init_bold_fit_wf( (hmc_boldref_wf, func_fit_reports_wf, [ ('outputnode.validation_report', 'inputnode.validation_report'), ]), - ]) - # fmt:on + (ds_hmc_boldref_wf, hmc_boldref_source_buffer, [ + ('outputnode.boldref', 'in_file'), + ]), + ]) # fmt:skip else: config.loggers.workflow.info('Found HMC boldref - skipping Stage 1') @@ -417,12 +422,11 @@ def init_bold_fit_wf( hmcref_buffer.inputs.boldref = precomputed['hmc_boldref'] - # fmt:off workflow.connect([ (validate_bold, hmcref_buffer, [('out_file', 'bold_file')]), (validate_bold, func_fit_reports_wf, [('out_report', 'inputnode.validation_report')]), - ]) - # fmt:on + (hmcref_buffer, hmc_boldref_source_buffer, [('boldref', 'in_file')]), + ]) # fmt:skip # Stage 2: Estimate head motion if not hmc_xforms: @@ -437,7 +441,6 @@ def init_bold_fit_wf( ) ds_hmc_wf.inputs.inputnode.source_files = [bold_file] - # fmt:off workflow.connect([ (hmcref_buffer, bold_hmc_wf, [ ('boldref', 'inputnode.raw_ref_image'), @@ -445,12 +448,11 @@ def init_bold_fit_wf( ]), (bold_hmc_wf, ds_hmc_wf, [('outputnode.xforms', 'inputnode.xforms')]), (bold_hmc_wf, hmc_buffer, [ - ('outputnode.xforms', 'hmc_xforms'), ('outputnode.movpar_file', 'movpar_file'), ('outputnode.rmsd_file', 'rmsd_file'), ]), - ]) - # fmt:on + (ds_hmc_wf, hmc_buffer, [('outputnode.xforms', 'hmc_xforms')]), + ]) # fmt:skip else: config.loggers.workflow.info('Found motion correction transforms - skipping Stage 2') hmc_buffer.inputs.hmc_xforms = hmc_xforms @@ -471,15 +473,15 @@ def init_bold_fit_wf( name='ds_coreg_boldref_wf', ) - # fmt:off workflow.connect([ (hmcref_buffer, fmapref_buffer, [('boldref', 'boldref_files')]), (fmapref_buffer, enhance_boldref_wf, [('out', 'inputnode.in_file')]), - (fmapref_buffer, ds_coreg_boldref_wf, [('out', 'inputnode.source_files')]), + (hmc_boldref_source_buffer, ds_coreg_boldref_wf, [ + ('in_file', 'inputnode.source_files'), + ]), (ds_coreg_boldref_wf, regref_buffer, [('outputnode.boldref', 'boldref')]), (fmapref_buffer, func_fit_reports_wf, [('out', 'inputnode.sdc_boldref')]), - ]) - # fmt:on + ]) # fmt:skip if fieldmap_id: fmap_select = pe.Node( diff --git a/fmriprep/workflows/bold/outputs.py b/fmriprep/workflows/bold/outputs.py index fdc3beda3..89931ef0b 100644 --- a/fmriprep/workflows/bold/outputs.py +++ b/fmriprep/workflows/bold/outputs.py @@ -29,11 +29,11 @@ from nipype.pipeline import engine as pe from niworkflows.interfaces.fixes import FixHeaderApplyTransforms as ApplyTransforms from niworkflows.utils.images import dseg_label -from smriprep.workflows.outputs import _bids_relative from fmriprep import config from fmriprep.config import DEFAULT_MEMORY_MIN_GB from fmriprep.interfaces import DerivativesDataSink +from fmriprep.interfaces.bids import BIDSURI from fmriprep.utils.bids import dismiss_echo @@ -437,8 +437,14 @@ def init_ds_boldref_wf( ) outputnode = pe.Node(niu.IdentityInterface(fields=['boldref']), name='outputnode') - raw_sources = pe.Node(niu.Function(function=_bids_relative), name='raw_sources') - raw_sources.inputs.bids_root = bids_root + sources = pe.Node( + BIDSURI( + numinputs=1, + dataset_links=config.execution.dataset_links, + out_dir=str(config.execution.fmriprep_dir.absolute()), + ), + name='sources', + ) ds_boldref = pe.Node( DerivativesDataSink( @@ -454,10 +460,10 @@ def init_ds_boldref_wf( # fmt:off workflow.connect([ - (inputnode, raw_sources, [('source_files', 'in_files')]), + (inputnode, sources, [('source_files', 'in1')]), (inputnode, ds_boldref, [('boldref', 'in_file'), ('source_files', 'source_file')]), - (raw_sources, ds_boldref, [('out', 'RawSources')]), + (sources, ds_boldref, [('out', 'Sources')]), (ds_boldref, outputnode, [('out_file', 'boldref')]), ]) # fmt:on @@ -481,8 +487,14 @@ def init_ds_registration_wf( ) outputnode = pe.Node(niu.IdentityInterface(fields=['xform']), name='outputnode') - raw_sources = pe.Node(niu.Function(function=_bids_relative), name='raw_sources') - raw_sources.inputs.bids_root = bids_root + sources = pe.Node( + BIDSURI( + numinputs=1, + dataset_links=config.execution.dataset_links, + out_dir=str(config.execution.fmriprep_dir.absolute()), + ), + name='sources', + ) ds_xform = pe.Node( DerivativesDataSink( @@ -500,10 +512,10 @@ def init_ds_registration_wf( # fmt:off workflow.connect([ - (inputnode, raw_sources, [('source_files', 'in_files')]), + (inputnode, sources, [('source_files', 'in1')]), (inputnode, ds_xform, [('xform', 'in_file'), ('source_files', 'source_file')]), - (raw_sources, ds_xform, [('out', 'RawSources')]), + (sources, ds_xform, [('out', 'Sources')]), (ds_xform, outputnode, [('out_file', 'xform')]), ]) # fmt:on @@ -525,8 +537,14 @@ def init_ds_hmc_wf( ) outputnode = pe.Node(niu.IdentityInterface(fields=['xforms']), name='outputnode') - raw_sources = pe.Node(niu.Function(function=_bids_relative), name='raw_sources') - raw_sources.inputs.bids_root = bids_root + sources = pe.Node( + BIDSURI( + numinputs=1, + dataset_links=config.execution.dataset_links, + out_dir=str(config.execution.fmriprep_dir.absolute()), + ), + name='sources', + ) ds_xforms = pe.Node( DerivativesDataSink( @@ -544,10 +562,10 @@ def init_ds_hmc_wf( # fmt:off workflow.connect([ - (inputnode, raw_sources, [('source_files', 'in_files')]), + (inputnode, sources, [('source_files', 'in1')]), (inputnode, ds_xforms, [('xforms', 'in_file'), ('source_files', 'source_file')]), - (raw_sources, ds_xforms, [('out', 'RawSources')]), + (sources, ds_xforms, [('out', 'Sources')]), (ds_xforms, outputnode, [('out_file', 'xforms')]), ]) # fmt:on @@ -577,14 +595,29 @@ def init_ds_bold_native_wf( 'bold_mask', 'bold_echos', 't2star', + # Transforms previously used to generate the outputs + 'motion_xfm', + 'boldref2fmap_xfm', ] ), name='inputnode', ) - raw_sources = pe.Node(niu.Function(function=_bids_relative), name='raw_sources') - raw_sources.inputs.bids_root = bids_root - workflow.connect(inputnode, 'source_files', raw_sources, 'in_files') + sources = pe.Node( + BIDSURI( + numinputs=3, + dataset_links=config.execution.dataset_links, + out_dir=str(config.execution.fmriprep_dir.absolute()), + ), + name='sources', + ) + workflow.connect([ + (inputnode, sources, [ + ('source_files', 'in1'), + ('motion_xfm', 'in2'), + ('boldref2fmap_xfm', 'in3'), + ]), + ]) # fmt:skip # Masks should be output if any other derivatives are output ds_bold_mask = pe.Node( @@ -604,7 +637,7 @@ def init_ds_bold_native_wf( ('source_files', 'source_file'), ('bold_mask', 'in_file'), ]), - (raw_sources, ds_bold_mask, [('out', 'RawSources')]), + (sources, ds_bold_mask, [('out', 'Sources')]), ]) # fmt:skip if bold_output: @@ -652,7 +685,7 @@ def init_ds_bold_native_wf( ('source_files', 'source_file'), ('t2star', 'in_file'), ]), - (raw_sources, ds_t2star, [('out', 'RawSources')]), + (sources, ds_t2star, [('out', 'Sources')]), ]) # fmt:skip if echo_output: @@ -714,8 +747,14 @@ def init_ds_volumes_wf( name='inputnode', ) - raw_sources = pe.Node(niu.Function(function=_bids_relative), name='raw_sources') - raw_sources.inputs.bids_root = bids_root + sources = pe.Node( + BIDSURI( + numinputs=3, + dataset_links=config.execution.dataset_links, + out_dir=str(config.execution.fmriprep_dir.absolute()), + ), + name='sources', + ) boldref2target = pe.Node(niu.Merge(2), name='boldref2target') # BOLD is pre-resampled @@ -733,7 +772,11 @@ def init_ds_volumes_wf( mem_gb=DEFAULT_MEMORY_MIN_GB, ) workflow.connect([ - (inputnode, raw_sources, [('source_files', 'in_files')]), + (inputnode, sources, [ + ('source_files', 'in1'), + ('boldref2anat_xfm', 'in2'), + ('anat2std_xfm', 'in3'), + ]), (inputnode, boldref2target, [ # Note that ANTs expects transforms in target-to-source order # Reverse this for nitransforms-based resamplers