ENH: Use BIDS URIs to track Sources in sidecars (#3255)

Closes #3252. This is just a first pass- it only modifies the RawSources for now. I can add the immediate Sources in another PR (this'll require a lot more effort). ## Changes proposed in this pull request - Add `DatasetLinks` field to the `dataset_description.json` - The input dataset is called `raw` - Any other datasets supplied through `--derivatives` are automatically named `deriv-<index>`. We might want to support named derivatives at some point. - Replace BIDS-relative paths in `RawSources` fields with BIDS-URIs. - Change `RawSources` to `Sources` in the sidecar files, since `RawSources` is deprecated. ## Documentation that should be reviewed I'll probably need to update the documentation, but haven't yet. --------- Co-authored-by: Mathias Goncalves <[email protected]>
nipreps · Mar 22, 2024 · 6f8c965 · 6f8c965
1 parent cdf7040
commit 6f8c965
Show file tree

Hide file tree

Showing 8 changed files with 303 additions and 37 deletions.
diff --git a/fmriprep/cli/run.py b/fmriprep/cli/run.py
@@ -222,7 +222,11 @@ def main():
             config.execution.run_uuid,
             session_list=session_list,
         )
-        write_derivative_description(config.execution.bids_dir, config.execution.fmriprep_dir)
+        write_derivative_description(
+            config.execution.bids_dir,
+            config.execution.fmriprep_dir,
+            dataset_links=config.execution.dataset_links,
+        )
         write_bidsignore(config.execution.fmriprep_dir)
 
         if failed_reports:

diff --git a/fmriprep/config.py b/fmriprep/config.py
@@ -226,6 +226,8 @@ def load(cls, settings, init=True, ignore=None):
             if k in cls._paths:
                 if isinstance(v, list | tuple):
                     setattr(cls, k, [Path(val).absolute() for val in v])
+                elif isinstance(v, dict):
+                    setattr(cls, k, {key: Path(val).absolute() for key, val in v.items()})
                 else:
                     setattr(cls, k, Path(v).absolute())
             elif hasattr(cls, k):
@@ -251,6 +253,8 @@ def get(cls):
             if k in cls._paths:
                 if isinstance(v, list | tuple):
                     v = [str(val) for val in v]
+                elif isinstance(v, dict):
+                    v = {key: str(val) for key, val in v.items()}
                 else:
                     v = str(v)
             if isinstance(v, SpatialReferences):
@@ -439,6 +443,8 @@ class execution(_Config):
     """Path to a working directory where intermediate results will be available."""
     write_graph = False
     """Write out the computational graph corresponding to the planned preprocessing."""
+    dataset_links = {}
+    """A dictionary of dataset links to be used to track Sources in sidecars."""
 
     _layout = None
 
@@ -454,6 +460,7 @@ class execution(_Config):
         'output_dir',
         'templateflow_home',
         'work_dir',
+        'dataset_links',
     )
 
     @classmethod
@@ -518,6 +525,11 @@ def _process_value(value):
                 for k, v in filters.items():
                     cls.bids_filters[acq][k] = _process_value(v)
 
+        dataset_links = {'raw': cls.bids_dir}
+        for i_deriv, deriv_path in enumerate(cls.derivatives):
+            dataset_links[f'deriv-{i_deriv}'] = deriv_path
+        cls.dataset_links = dataset_links
+
         if 'all' in cls.debug:
             cls.debug = list(DEBUG_MODES)
 

diff --git a/fmriprep/interfaces/bids.py b/fmriprep/interfaces/bids.py
@@ -0,0 +1,62 @@
+"""BIDS-related interfaces."""
+
+from pathlib import Path
+
+from bids.utils import listify
+from nipype.interfaces.base import (
+    DynamicTraitedSpec,
+    SimpleInterface,
+    TraitedSpec,
+    isdefined,
+    traits,
+)
+from nipype.interfaces.io import add_traits
+from nipype.interfaces.utility.base import _ravel
+
+from ..utils.bids import _find_nearest_path
+
+
+class _BIDSURIInputSpec(DynamicTraitedSpec):
+    dataset_links = traits.Dict(mandatory=True, desc='Dataset links')
+    out_dir = traits.Str(mandatory=True, desc='Output directory')
+
+
+class _BIDSURIOutputSpec(TraitedSpec):
+    out = traits.List(
+        traits.Str,
+        desc='BIDS URI(s) for file',
+    )
+
+
+class BIDSURI(SimpleInterface):
+    """Convert input filenames to BIDS URIs, based on links in the dataset.
+
+    This interface can combine multiple lists of inputs.
+    """
+
+    input_spec = _BIDSURIInputSpec
+    output_spec = _BIDSURIOutputSpec
+
+    def __init__(self, numinputs=0, **inputs):
+        super().__init__(**inputs)
+        self._numinputs = numinputs
+        if numinputs >= 1:
+            input_names = [f'in{i + 1}' for i in range(numinputs)]
+        else:
+            input_names = []
+        add_traits(self.inputs, input_names)
+
+    def _run_interface(self, runtime):
+        inputs = [getattr(self.inputs, f'in{i + 1}') for i in range(self._numinputs)]
+        in_files = listify(inputs)
+        in_files = _ravel(in_files)
+        # Remove undefined inputs
+        in_files = [f for f in in_files if isdefined(f)]
+        # Convert the dataset links to BIDS URI prefixes
+        updated_keys = {f'bids:{k}:': Path(v) for k, v in self.inputs.dataset_links.items()}
+        updated_keys['bids::'] = Path(self.inputs.out_dir)
+        # Convert the paths to BIDS URIs
+        out = [_find_nearest_path(updated_keys, f) for f in in_files]
+        self._results['out'] = out
+
+        return runtime
diff --git a/fmriprep/interfaces/tests/test_bids.py b/fmriprep/interfaces/tests/test_bids.py
@@ -0,0 +1,72 @@
+"""Tests for fmriprep.interfaces.bids."""
+
+
+def test_BIDSURI():
+    """Test the BIDSURI interface."""
+    from fmriprep.interfaces.bids import BIDSURI
+
+    dataset_links = {
+        'raw': '/data',
+        'deriv-0': '/data/derivatives/source-1',
+    }
+    out_dir = '/data/derivatives/fmriprep'
+
+    # A single element as a string
+    interface = BIDSURI(
+        numinputs=1,
+        dataset_links=dataset_links,
+        out_dir=out_dir,
+    )
+    interface.inputs.in1 = '/data/sub-01/func/sub-01_task-rest_bold.nii.gz'
+    results = interface.run()
+    assert results.outputs.out == ['bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz']
+
+    # A single element as a list
+    interface = BIDSURI(
+        numinputs=1,
+        dataset_links=dataset_links,
+        out_dir=out_dir,
+    )
+    interface.inputs.in1 = ['/data/sub-01/func/sub-01_task-rest_bold.nii.gz']
+    results = interface.run()
+    assert results.outputs.out == ['bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz']
+
+    # Two inputs: a string and a list
+    interface = BIDSURI(
+        numinputs=2,
+        dataset_links=dataset_links,
+        out_dir=out_dir,
+    )
+    interface.inputs.in1 = '/data/sub-01/func/sub-01_task-rest_bold.nii.gz'
+    interface.inputs.in2 = [
+        '/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz',
+        '/out/sub-01/func/sub-01_task-rest_bold.nii.gz',
+    ]
+    results = interface.run()
+    assert results.outputs.out == [
+        'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz',
+        'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz',
+        '/out/sub-01/func/sub-01_task-rest_bold.nii.gz',  # No change
+    ]
+
+    # Two inputs as lists
+    interface = BIDSURI(
+        numinputs=2,
+        dataset_links=dataset_links,
+        out_dir=out_dir,
+    )
+    interface.inputs.in1 = [
+        '/data/sub-01/func/sub-01_task-rest_bold.nii.gz',
+        'bids:raw:sub-01/func/sub-01_task-rest_boldref.nii.gz',
+    ]
+    interface.inputs.in2 = [
+        '/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz',
+        '/out/sub-01/func/sub-01_task-rest_bold.nii.gz',
+    ]
+    results = interface.run()
+    assert results.outputs.out == [
+        'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz',
+        'bids:raw:sub-01/func/sub-01_task-rest_boldref.nii.gz',  # No change
+        'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz',
+        '/out/sub-01/func/sub-01_task-rest_bold.nii.gz',  # No change
+    ]
diff --git a/fmriprep/utils/bids.py b/fmriprep/utils/bids.py
@@ -97,7 +97,7 @@ def write_bidsignore(deriv_dir):
     ignore_file.write_text('\n'.join(bids_ignore) + '\n')
 
 
-def write_derivative_description(bids_dir, deriv_dir):
+def write_derivative_description(bids_dir, deriv_dir, dataset_links=None):
     from .. import __version__
 
     DOWNLOAD_URL = f'https://github.com/nipreps/fmriprep/archive/{__version__}.tar.gz'
@@ -145,6 +145,10 @@ def write_derivative_description(bids_dir, deriv_dir):
     if 'License' in orig_desc:
         desc['License'] = orig_desc['License']
 
+    # Add DatasetLinks
+    if dataset_links:
+        desc['DatasetLinks'] = {k: str(v) for k, v in dataset_links.items()}
+
     Path.write_text(deriv_dir / 'dataset_description.json', json.dumps(desc, indent=4))
 
 
@@ -343,3 +347,68 @@ def dismiss_echo(entities=None):
         entities.append('echo')
 
     return entities
+
+
+def _find_nearest_path(path_dict, input_path):
+    """Find the nearest relative path from an input path to a dictionary of paths.
+
+    If ``input_path`` is not relative to any of the paths in ``path_dict``,
+    the absolute path string is returned.
+
+    If ``input_path`` is already a BIDS-URI, then it will be returned unmodified.
+
+    Parameters
+    ----------
+    path_dict : dict of (str, Path)
+        A dictionary of paths.
+    input_path : Path
+        The input path to match.
+
+    Returns
+    -------
+    matching_path : str
+        The nearest relative path from the input path to a path in the dictionary.
+        This is either the concatenation of the associated key from ``path_dict``
+        and the relative path from the associated value from ``path_dict`` to ``input_path``,
+        or the absolute path to ``input_path`` if no matching path is found from ``path_dict``.
+
+    Examples
+    --------
+    >>> from pathlib import Path
+    >>> path_dict = {
+    ...     'bids::': Path('/data/derivatives/fmriprep'),
+    ...     'bids:raw:': Path('/data'),
+    ...     'bids:deriv-0:': Path('/data/derivatives/source-1'),
+    ... }
+    >>> input_path = Path('/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz')
+    >>> _find_nearest_path(path_dict, input_path)  # match to 'bids:deriv-0:'
+    'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz'
+    >>> input_path = Path('/out/sub-01/func/sub-01_task-rest_bold.nii.gz')
+    >>> _find_nearest_path(path_dict, input_path)  # no match- absolute path
+    '/out/sub-01/func/sub-01_task-rest_bold.nii.gz'
+    >>> input_path = Path('/data/sub-01/func/sub-01_task-rest_bold.nii.gz')
+    >>> _find_nearest_path(path_dict, input_path)  # match to 'bids:raw:'
+    'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz'
+    >>> input_path = 'bids::sub-01/func/sub-01_task-rest_bold.nii.gz'
+    >>> _find_nearest_path(path_dict, input_path)  # already a BIDS-URI
+    'bids::sub-01/func/sub-01_task-rest_bold.nii.gz'
+    """
+    # Don't modify BIDS-URIs
+    if isinstance(input_path, str) and input_path.startswith('bids:'):
+        return input_path
+
+    input_path = Path(input_path)
+    matching_path = None
+    for key, path in path_dict.items():
+        if input_path.is_relative_to(path):
+            relative_path = input_path.relative_to(path)
+            if (matching_path is None) or (len(relative_path.parts) < len(matching_path.parts)):
+                matching_key = key
+                matching_path = relative_path
+
+    if matching_path is None:
+        matching_path = str(input_path.absolute())
+    else:
+        matching_path = f'{matching_key}{matching_path}'
+
+    return matching_path
diff --git a/fmriprep/workflows/bold/base.py b/fmriprep/workflows/bold/base.py
@@ -328,6 +328,8 @@ def init_bold_wf(
         workflow.connect([
             (bold_fit_wf, ds_bold_native_wf, [
                 ('outputnode.bold_mask', 'inputnode.bold_mask'),
+                ('outputnode.motion_xfm', 'inputnode.motion_xfm'),
+                ('outputnode.boldref2fmap_xfm', 'inputnode.boldref2fmap_xfm'),
             ]),
             (bold_native_wf, ds_bold_native_wf, [
                 ('outputnode.bold_native', 'inputnode.bold'),

diff --git a/fmriprep/workflows/bold/fit.py b/fmriprep/workflows/bold/fit.py
@@ -378,6 +378,10 @@ def init_bold_fit_wf(
     # fmt:on
 
     # Stage 1: Generate motion correction boldref
+    hmc_boldref_source_buffer = pe.Node(
+        niu.IdentityInterface(fields=['in_file']),
+        name='hmc_boldref_source_buffer',
+    )
     if not have_hmcref:
         config.loggers.workflow.info('Stage 1: Adding HMC boldref workflow')
         hmc_boldref_wf = init_raw_boldref_wf(
@@ -395,7 +399,6 @@ def init_bold_fit_wf(
         )
         ds_hmc_boldref_wf.inputs.inputnode.source_files = [bold_file]
 
-        # fmt:off
         workflow.connect([
             (hmc_boldref_wf, hmcref_buffer, [
                 ('outputnode.bold_file', 'bold_file'),
@@ -407,8 +410,10 @@ def init_bold_fit_wf(
             (hmc_boldref_wf, func_fit_reports_wf, [
                 ('outputnode.validation_report', 'inputnode.validation_report'),
             ]),
-        ])
-        # fmt:on
+            (ds_hmc_boldref_wf, hmc_boldref_source_buffer, [
+                ('outputnode.boldref', 'in_file'),
+            ]),
+        ])  # fmt:skip
     else:
         config.loggers.workflow.info('Found HMC boldref - skipping Stage 1')
 
@@ -417,12 +422,11 @@ def init_bold_fit_wf(
 
         hmcref_buffer.inputs.boldref = precomputed['hmc_boldref']
 
-        # fmt:off
         workflow.connect([
             (validate_bold, hmcref_buffer, [('out_file', 'bold_file')]),
             (validate_bold, func_fit_reports_wf, [('out_report', 'inputnode.validation_report')]),
-        ])
-        # fmt:on
+            (hmcref_buffer, hmc_boldref_source_buffer, [('boldref', 'in_file')]),
+        ])  # fmt:skip
 
     # Stage 2: Estimate head motion
     if not hmc_xforms:
@@ -437,20 +441,18 @@ def init_bold_fit_wf(
         )
         ds_hmc_wf.inputs.inputnode.source_files = [bold_file]
 
-        # fmt:off
         workflow.connect([
             (hmcref_buffer, bold_hmc_wf, [
                 ('boldref', 'inputnode.raw_ref_image'),
                 ('bold_file', 'inputnode.bold_file'),
             ]),
             (bold_hmc_wf, ds_hmc_wf, [('outputnode.xforms', 'inputnode.xforms')]),
             (bold_hmc_wf, hmc_buffer, [
-                ('outputnode.xforms', 'hmc_xforms'),
                 ('outputnode.movpar_file', 'movpar_file'),
                 ('outputnode.rmsd_file', 'rmsd_file'),
             ]),
-        ])
-        # fmt:on
+            (ds_hmc_wf, hmc_buffer, [('outputnode.xforms', 'hmc_xforms')]),
+        ])  # fmt:skip
     else:
         config.loggers.workflow.info('Found motion correction transforms - skipping Stage 2')
         hmc_buffer.inputs.hmc_xforms = hmc_xforms
@@ -471,15 +473,15 @@ def init_bold_fit_wf(
             name='ds_coreg_boldref_wf',
         )
 
-        # fmt:off
         workflow.connect([
             (hmcref_buffer, fmapref_buffer, [('boldref', 'boldref_files')]),
             (fmapref_buffer, enhance_boldref_wf, [('out', 'inputnode.in_file')]),
-            (fmapref_buffer, ds_coreg_boldref_wf, [('out', 'inputnode.source_files')]),
+            (hmc_boldref_source_buffer, ds_coreg_boldref_wf, [
+                ('in_file', 'inputnode.source_files'),
+            ]),
             (ds_coreg_boldref_wf, regref_buffer, [('outputnode.boldref', 'boldref')]),
             (fmapref_buffer, func_fit_reports_wf, [('out', 'inputnode.sdc_boldref')]),
-        ])
-        # fmt:on
+        ])  # fmt:skip
 
         if fieldmap_id:
             fmap_select = pe.Node(