Skip to content

Commit

Permalink
ENH: Use BIDS URIs to track Sources in sidecars (#3255)
Browse files Browse the repository at this point in the history
Closes #3252. This is just a first pass- it only modifies the RawSources
for now. I can add the immediate Sources in another PR (this'll require
a lot more effort).

## Changes proposed in this pull request

- Add `DatasetLinks` field to the `dataset_description.json`
    - The input dataset is called `raw`
- Any other datasets supplied through `--derivatives` are automatically
named `deriv-<index>`. We might want to support named derivatives at
some point.
- Replace BIDS-relative paths in `RawSources` fields with BIDS-URIs.
- Change `RawSources` to `Sources` in the sidecar files, since
`RawSources` is deprecated.

## Documentation that should be reviewed

I'll probably need to update the documentation, but haven't yet.

---------

Co-authored-by: Mathias Goncalves <[email protected]>
  • Loading branch information
tsalo and mgxd authored Mar 22, 2024
1 parent cdf7040 commit 6f8c965
Show file tree
Hide file tree
Showing 8 changed files with 303 additions and 37 deletions.
6 changes: 5 additions & 1 deletion fmriprep/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,11 @@ def main():
config.execution.run_uuid,
session_list=session_list,
)
write_derivative_description(config.execution.bids_dir, config.execution.fmriprep_dir)
write_derivative_description(
config.execution.bids_dir,
config.execution.fmriprep_dir,
dataset_links=config.execution.dataset_links,
)
write_bidsignore(config.execution.fmriprep_dir)

if failed_reports:
Expand Down
12 changes: 12 additions & 0 deletions fmriprep/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ def load(cls, settings, init=True, ignore=None):
if k in cls._paths:
if isinstance(v, list | tuple):
setattr(cls, k, [Path(val).absolute() for val in v])
elif isinstance(v, dict):
setattr(cls, k, {key: Path(val).absolute() for key, val in v.items()})
else:
setattr(cls, k, Path(v).absolute())
elif hasattr(cls, k):
Expand All @@ -251,6 +253,8 @@ def get(cls):
if k in cls._paths:
if isinstance(v, list | tuple):
v = [str(val) for val in v]
elif isinstance(v, dict):
v = {key: str(val) for key, val in v.items()}
else:
v = str(v)
if isinstance(v, SpatialReferences):
Expand Down Expand Up @@ -439,6 +443,8 @@ class execution(_Config):
"""Path to a working directory where intermediate results will be available."""
write_graph = False
"""Write out the computational graph corresponding to the planned preprocessing."""
dataset_links = {}
"""A dictionary of dataset links to be used to track Sources in sidecars."""

_layout = None

Expand All @@ -454,6 +460,7 @@ class execution(_Config):
'output_dir',
'templateflow_home',
'work_dir',
'dataset_links',
)

@classmethod
Expand Down Expand Up @@ -518,6 +525,11 @@ def _process_value(value):
for k, v in filters.items():
cls.bids_filters[acq][k] = _process_value(v)

dataset_links = {'raw': cls.bids_dir}
for i_deriv, deriv_path in enumerate(cls.derivatives):
dataset_links[f'deriv-{i_deriv}'] = deriv_path
cls.dataset_links = dataset_links

if 'all' in cls.debug:
cls.debug = list(DEBUG_MODES)

Expand Down
62 changes: 62 additions & 0 deletions fmriprep/interfaces/bids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""BIDS-related interfaces."""

from pathlib import Path

from bids.utils import listify
from nipype.interfaces.base import (
DynamicTraitedSpec,
SimpleInterface,
TraitedSpec,
isdefined,
traits,
)
from nipype.interfaces.io import add_traits
from nipype.interfaces.utility.base import _ravel

from ..utils.bids import _find_nearest_path


class _BIDSURIInputSpec(DynamicTraitedSpec):
dataset_links = traits.Dict(mandatory=True, desc='Dataset links')
out_dir = traits.Str(mandatory=True, desc='Output directory')


class _BIDSURIOutputSpec(TraitedSpec):
out = traits.List(
traits.Str,
desc='BIDS URI(s) for file',
)


class BIDSURI(SimpleInterface):
"""Convert input filenames to BIDS URIs, based on links in the dataset.
This interface can combine multiple lists of inputs.
"""

input_spec = _BIDSURIInputSpec
output_spec = _BIDSURIOutputSpec

def __init__(self, numinputs=0, **inputs):
super().__init__(**inputs)
self._numinputs = numinputs
if numinputs >= 1:
input_names = [f'in{i + 1}' for i in range(numinputs)]
else:
input_names = []
add_traits(self.inputs, input_names)

def _run_interface(self, runtime):
inputs = [getattr(self.inputs, f'in{i + 1}') for i in range(self._numinputs)]
in_files = listify(inputs)
in_files = _ravel(in_files)
# Remove undefined inputs
in_files = [f for f in in_files if isdefined(f)]
# Convert the dataset links to BIDS URI prefixes
updated_keys = {f'bids:{k}:': Path(v) for k, v in self.inputs.dataset_links.items()}
updated_keys['bids::'] = Path(self.inputs.out_dir)
# Convert the paths to BIDS URIs
out = [_find_nearest_path(updated_keys, f) for f in in_files]
self._results['out'] = out

return runtime
72 changes: 72 additions & 0 deletions fmriprep/interfaces/tests/test_bids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Tests for fmriprep.interfaces.bids."""


def test_BIDSURI():
"""Test the BIDSURI interface."""
from fmriprep.interfaces.bids import BIDSURI

dataset_links = {
'raw': '/data',
'deriv-0': '/data/derivatives/source-1',
}
out_dir = '/data/derivatives/fmriprep'

# A single element as a string
interface = BIDSURI(
numinputs=1,
dataset_links=dataset_links,
out_dir=out_dir,
)
interface.inputs.in1 = '/data/sub-01/func/sub-01_task-rest_bold.nii.gz'
results = interface.run()
assert results.outputs.out == ['bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz']

# A single element as a list
interface = BIDSURI(
numinputs=1,
dataset_links=dataset_links,
out_dir=out_dir,
)
interface.inputs.in1 = ['/data/sub-01/func/sub-01_task-rest_bold.nii.gz']
results = interface.run()
assert results.outputs.out == ['bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz']

# Two inputs: a string and a list
interface = BIDSURI(
numinputs=2,
dataset_links=dataset_links,
out_dir=out_dir,
)
interface.inputs.in1 = '/data/sub-01/func/sub-01_task-rest_bold.nii.gz'
interface.inputs.in2 = [
'/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz',
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz',
]
results = interface.run()
assert results.outputs.out == [
'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz',
'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz',
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz', # No change
]

# Two inputs as lists
interface = BIDSURI(
numinputs=2,
dataset_links=dataset_links,
out_dir=out_dir,
)
interface.inputs.in1 = [
'/data/sub-01/func/sub-01_task-rest_bold.nii.gz',
'bids:raw:sub-01/func/sub-01_task-rest_boldref.nii.gz',
]
interface.inputs.in2 = [
'/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz',
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz',
]
results = interface.run()
assert results.outputs.out == [
'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz',
'bids:raw:sub-01/func/sub-01_task-rest_boldref.nii.gz', # No change
'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz',
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz', # No change
]
71 changes: 70 additions & 1 deletion fmriprep/utils/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def write_bidsignore(deriv_dir):
ignore_file.write_text('\n'.join(bids_ignore) + '\n')


def write_derivative_description(bids_dir, deriv_dir):
def write_derivative_description(bids_dir, deriv_dir, dataset_links=None):
from .. import __version__

DOWNLOAD_URL = f'https://github.com/nipreps/fmriprep/archive/{__version__}.tar.gz'
Expand Down Expand Up @@ -145,6 +145,10 @@ def write_derivative_description(bids_dir, deriv_dir):
if 'License' in orig_desc:
desc['License'] = orig_desc['License']

# Add DatasetLinks
if dataset_links:
desc['DatasetLinks'] = {k: str(v) for k, v in dataset_links.items()}

Path.write_text(deriv_dir / 'dataset_description.json', json.dumps(desc, indent=4))


Expand Down Expand Up @@ -343,3 +347,68 @@ def dismiss_echo(entities=None):
entities.append('echo')

return entities


def _find_nearest_path(path_dict, input_path):
"""Find the nearest relative path from an input path to a dictionary of paths.
If ``input_path`` is not relative to any of the paths in ``path_dict``,
the absolute path string is returned.
If ``input_path`` is already a BIDS-URI, then it will be returned unmodified.
Parameters
----------
path_dict : dict of (str, Path)
A dictionary of paths.
input_path : Path
The input path to match.
Returns
-------
matching_path : str
The nearest relative path from the input path to a path in the dictionary.
This is either the concatenation of the associated key from ``path_dict``
and the relative path from the associated value from ``path_dict`` to ``input_path``,
or the absolute path to ``input_path`` if no matching path is found from ``path_dict``.
Examples
--------
>>> from pathlib import Path
>>> path_dict = {
... 'bids::': Path('/data/derivatives/fmriprep'),
... 'bids:raw:': Path('/data'),
... 'bids:deriv-0:': Path('/data/derivatives/source-1'),
... }
>>> input_path = Path('/data/derivatives/source-1/sub-01/func/sub-01_task-rest_bold.nii.gz')
>>> _find_nearest_path(path_dict, input_path) # match to 'bids:deriv-0:'
'bids:deriv-0:sub-01/func/sub-01_task-rest_bold.nii.gz'
>>> input_path = Path('/out/sub-01/func/sub-01_task-rest_bold.nii.gz')
>>> _find_nearest_path(path_dict, input_path) # no match- absolute path
'/out/sub-01/func/sub-01_task-rest_bold.nii.gz'
>>> input_path = Path('/data/sub-01/func/sub-01_task-rest_bold.nii.gz')
>>> _find_nearest_path(path_dict, input_path) # match to 'bids:raw:'
'bids:raw:sub-01/func/sub-01_task-rest_bold.nii.gz'
>>> input_path = 'bids::sub-01/func/sub-01_task-rest_bold.nii.gz'
>>> _find_nearest_path(path_dict, input_path) # already a BIDS-URI
'bids::sub-01/func/sub-01_task-rest_bold.nii.gz'
"""
# Don't modify BIDS-URIs
if isinstance(input_path, str) and input_path.startswith('bids:'):
return input_path

input_path = Path(input_path)
matching_path = None
for key, path in path_dict.items():
if input_path.is_relative_to(path):
relative_path = input_path.relative_to(path)
if (matching_path is None) or (len(relative_path.parts) < len(matching_path.parts)):
matching_key = key
matching_path = relative_path

if matching_path is None:
matching_path = str(input_path.absolute())
else:
matching_path = f'{matching_key}{matching_path}'

return matching_path
2 changes: 2 additions & 0 deletions fmriprep/workflows/bold/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,8 @@ def init_bold_wf(
workflow.connect([
(bold_fit_wf, ds_bold_native_wf, [
('outputnode.bold_mask', 'inputnode.bold_mask'),
('outputnode.motion_xfm', 'inputnode.motion_xfm'),
('outputnode.boldref2fmap_xfm', 'inputnode.boldref2fmap_xfm'),
]),
(bold_native_wf, ds_bold_native_wf, [
('outputnode.bold_native', 'inputnode.bold'),
Expand Down
30 changes: 16 additions & 14 deletions fmriprep/workflows/bold/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,10 @@ def init_bold_fit_wf(
# fmt:on

# Stage 1: Generate motion correction boldref
hmc_boldref_source_buffer = pe.Node(
niu.IdentityInterface(fields=['in_file']),
name='hmc_boldref_source_buffer',
)
if not have_hmcref:
config.loggers.workflow.info('Stage 1: Adding HMC boldref workflow')
hmc_boldref_wf = init_raw_boldref_wf(
Expand All @@ -395,7 +399,6 @@ def init_bold_fit_wf(
)
ds_hmc_boldref_wf.inputs.inputnode.source_files = [bold_file]

# fmt:off
workflow.connect([
(hmc_boldref_wf, hmcref_buffer, [
('outputnode.bold_file', 'bold_file'),
Expand All @@ -407,8 +410,10 @@ def init_bold_fit_wf(
(hmc_boldref_wf, func_fit_reports_wf, [
('outputnode.validation_report', 'inputnode.validation_report'),
]),
])
# fmt:on
(ds_hmc_boldref_wf, hmc_boldref_source_buffer, [
('outputnode.boldref', 'in_file'),
]),
]) # fmt:skip
else:
config.loggers.workflow.info('Found HMC boldref - skipping Stage 1')

Expand All @@ -417,12 +422,11 @@ def init_bold_fit_wf(

hmcref_buffer.inputs.boldref = precomputed['hmc_boldref']

# fmt:off
workflow.connect([
(validate_bold, hmcref_buffer, [('out_file', 'bold_file')]),
(validate_bold, func_fit_reports_wf, [('out_report', 'inputnode.validation_report')]),
])
# fmt:on
(hmcref_buffer, hmc_boldref_source_buffer, [('boldref', 'in_file')]),
]) # fmt:skip

# Stage 2: Estimate head motion
if not hmc_xforms:
Expand All @@ -437,20 +441,18 @@ def init_bold_fit_wf(
)
ds_hmc_wf.inputs.inputnode.source_files = [bold_file]

# fmt:off
workflow.connect([
(hmcref_buffer, bold_hmc_wf, [
('boldref', 'inputnode.raw_ref_image'),
('bold_file', 'inputnode.bold_file'),
]),
(bold_hmc_wf, ds_hmc_wf, [('outputnode.xforms', 'inputnode.xforms')]),
(bold_hmc_wf, hmc_buffer, [
('outputnode.xforms', 'hmc_xforms'),
('outputnode.movpar_file', 'movpar_file'),
('outputnode.rmsd_file', 'rmsd_file'),
]),
])
# fmt:on
(ds_hmc_wf, hmc_buffer, [('outputnode.xforms', 'hmc_xforms')]),
]) # fmt:skip
else:
config.loggers.workflow.info('Found motion correction transforms - skipping Stage 2')
hmc_buffer.inputs.hmc_xforms = hmc_xforms
Expand All @@ -471,15 +473,15 @@ def init_bold_fit_wf(
name='ds_coreg_boldref_wf',
)

# fmt:off
workflow.connect([
(hmcref_buffer, fmapref_buffer, [('boldref', 'boldref_files')]),
(fmapref_buffer, enhance_boldref_wf, [('out', 'inputnode.in_file')]),
(fmapref_buffer, ds_coreg_boldref_wf, [('out', 'inputnode.source_files')]),
(hmc_boldref_source_buffer, ds_coreg_boldref_wf, [
('in_file', 'inputnode.source_files'),
]),
(ds_coreg_boldref_wf, regref_buffer, [('outputnode.boldref', 'boldref')]),
(fmapref_buffer, func_fit_reports_wf, [('out', 'inputnode.sdc_boldref')]),
])
# fmt:on
]) # fmt:skip

if fieldmap_id:
fmap_select = pe.Node(
Expand Down
Loading

0 comments on commit 6f8c965

Please sign in to comment.