From cd665b141f503bed47199ccc4e7ac181f378f78f Mon Sep 17 00:00:00 2001
From: Oscar Esteban <code@oscaresteban.es>
Date: Thu, 15 Dec 2022 17:34:56 +0100
Subject: [PATCH 1/3] enh: create new filtering submodule

---
 src/eddymotion/data/filtering.py | 64 ++++++++++++++++++++++++++++++++
 src/eddymotion/estimator.py      | 45 ++--------------------
 2 files changed, 68 insertions(+), 41 deletions(-)
 create mode 100644 src/eddymotion/data/filtering.py

diff --git a/src/eddymotion/data/filtering.py b/src/eddymotion/data/filtering.py
new file mode 100644
index 00000000..bc74c4d8
--- /dev/null
+++ b/src/eddymotion/data/filtering.py
@@ -0,0 +1,64 @@
+# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
+# vi: set ft=python sts=4 ts=4 sw=4 et:
+#
+# Copyright 2022 The NiPreps Developers <nipreps@gmail.com>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# We support and encourage derived works from this project, please read
+# about our expectations at
+#
+#     https://www.nipreps.org/community/licensing/
+#
+"""Filtering data."""
+
+
+def advanced_clip(
+    data, p_min=35, p_max=99.98, nonnegative=True, dtype="int16", invert=False
+):
+    """
+    Remove outliers at both ends of the intensity distribution and fit into a given dtype.
+
+    This interface tries to emulate ANTs workflows' massaging that truncate images into
+    the 0-255 range, and applies percentiles for clipping images.
+    For image registration, normalizing the intensity into a compact range (e.g., uint8)
+    is generally advised.
+
+    To more robustly determine the clipping thresholds, spikes are removed from data with
+    a median filter.
+    Once the thresholds are calculated, the denoised data are thrown away and the thresholds
+    are applied on the original image.
+
+    """
+    import numpy as np
+    from scipy import ndimage
+    from skimage.morphology import ball
+
+    # Calculate stats on denoised version, to preempt outliers from biasing
+    denoised = ndimage.median_filter(data, footprint=ball(3))
+
+    a_min = np.percentile(denoised[denoised >= 0] if nonnegative else denoised, p_min)
+    a_max = np.percentile(denoised[denoised >= 0] if nonnegative else denoised, p_max)
+
+    # Clip and cast
+    data = np.clip(data, a_min=a_min, a_max=a_max)
+    data -= data.min()
+    data /= data.max()
+
+    if invert:
+        data = 1.0 - data
+
+    if dtype in ("uint8", "int16"):
+        data = np.round(255 * data).astype(dtype)
+
+    return data
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index 89a77ef3..da8f699e 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -198,49 +198,12 @@ def estimate(
         return dwdata.em_affines
 
 
-def _advanced_clip(data, p_min=35, p_max=99.98, nonnegative=True, dtype="int16", invert=False):
-    """
-    Remove outliers at both ends of the intensity distribution and fit into a given dtype.
-
-    This interface tries to emulate ANTs workflows' massaging that truncate images into
-    the 0-255 range, and applies percentiles for clipping images.
-    For image registration, normalizing the intensity into a compact range (e.g., uint8)
-    is generally advised.
-
-    To more robustly determine the clipping thresholds, spikes are removed from data with
-    a median filter.
-    Once the thresholds are calculated, the denoised data are thrown away and the thresholds
-    are applied on the original image.
-
-    """
-    import numpy as np
-    from scipy import ndimage
-    from skimage.morphology import ball
-
-    # Calculate stats on denoised version, to preempt outliers from biasing
-    denoised = ndimage.median_filter(data, footprint=ball(3))
-
-    a_min = np.percentile(denoised[denoised >= 0] if nonnegative else denoised, p_min)
-    a_max = np.percentile(denoised[denoised >= 0] if nonnegative else denoised, p_max)
-
-    # Clip and cast
-    data = np.clip(data, a_min=a_min, a_max=a_max)
-    data -= data.min()
-    data /= data.max()
-
-    if invert:
-        data = 1.0 - data
-
-    if dtype in ("uint8", "int16"):
-        data = np.round(255 * data).astype(dtype)
-
-    return data
-
-
 def _to_nifti(data, affine, filename, clip=True):
     data = np.squeeze(data)
     if clip:
-        data = _advanced_clip(data)
+        from eddymotion.data.filtering import advanced_clip
+
+        data = advanced_clip(data)
     nii = nb.Nifti1Image(
         data,
         affine,
@@ -293,7 +256,7 @@ def _prepare_kwargs(dwdata, kwargs):
         kwargs["mask"] = dwdata.brainmask
 
     if hasattr(dwdata, "bzero") and dwdata.bzero is not None:
-        kwargs["S0"] = _advanced_clip(dwdata.bzero)
+        kwargs["S0"] = dwdata.bzero
 
     if hasattr(dwdata, "gradients"):
         kwargs["gtab"] = dwdata.gradients

From b61085324d477d156edc7e2bc1c73f6ea35816af Mon Sep 17 00:00:00 2001
From: Oscar Esteban <code@oscaresteban.es>
Date: Fri, 16 Dec 2022 17:06:08 +0100
Subject: [PATCH 2/3] enh: add implementation of smoothing and decimating

---
 src/eddymotion/data/filtering.py | 87 ++++++++++++++++++++++++++++++++
 src/eddymotion/estimator.py      |  3 +-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/src/eddymotion/data/filtering.py b/src/eddymotion/data/filtering.py
index bc74c4d8..3e08bb3d 100644
--- a/src/eddymotion/data/filtering.py
+++ b/src/eddymotion/data/filtering.py
@@ -23,6 +23,93 @@
 """Filtering data."""
 
 
+def gaussian_filter(data, vox_width):
+    """
+    Apply a Gaussian smoothing filter of a given width (in voxels)
+
+    Parameters
+    ----------
+    data : :obj:`numpy.ndarray`
+        The input image's data array
+    vox_width : :obj:`numbers.Number` or :obj:`tuple` or :obj:`list`
+        The smoothing kernel width in voxels
+
+    Returns
+    -------
+    data : :obj:`numpy.ndarray`
+        The smoothed dataset
+
+    """
+    from numbers import Number
+    import numpy as np
+    from scipy.ndimage import gaussian_filter as _gs
+
+    data = np.squeeze(data)  # drop unused dimensions
+    ndim = data.ndim
+
+    if isinstance(vox_width, Number):
+        vox_width = tuple([vox_width] * min(3, ndim))
+
+    # Do not smooth across time/orientation
+    if ndim == 4 and len(vox_width) == 3:
+        vox_width = (*vox_width, 0)
+
+    return _gs(data, vox_width)
+
+
+def decimate(in_file, factor, smooth=True, order=3, nonnegative=True):
+    from numbers import Number
+    import numpy as np
+    from scipy.ndimage import map_coordinates
+    import nibabel as nb
+
+    imnii = nb.load(in_file)
+    data = np.squeeze(imnii.get_fdata())
+    datashape = data.shape
+    ndim = data.ndim
+
+    if isinstance(factor, Number):
+        factor = tuple([factor] * min(3, ndim))
+
+    if ndim == 4 and len(factor) == 3:
+        factor = (*factor, 0)
+
+    if smooth:
+        if smooth is True:
+            smooth = factor
+
+        data = gaussian_filter(data, smooth)
+
+    down_grid = np.array(
+        np.meshgrid(
+            *[np.arange(_s, step=int(_f) or 1) for _s, _f in zip(datashape, factor)],
+            indexing="ij",
+        )
+    )
+    new_shape = down_grid.shape[1:]
+    newaffine = imnii.affine.copy()
+    newaffine[:3, :3] = np.array(factor[:3]) * newaffine[:3, :3]
+    # newaffine[:3, 3] += imnii.affine[:3, :3] @ (0.5 / np.array(factor[:3], dtype="float32"))
+
+    # Resample data in the new grid
+    resampled = map_coordinates(
+        data,
+        down_grid.reshape((ndim, np.prod(new_shape))),
+        order=order,
+        mode="constant",
+        cval=0,
+        prefilter=True,
+    ).reshape(new_shape)
+
+    if order > 2 and nonnegative:
+        resampled[resampled < 0] = 0
+
+    newnii = nb.Nifti1Image(resampled, newaffine, imnii.header)
+    newnii.set_sform(newaffine, code=1)
+    newnii.set_qform(newaffine, code=1)
+    return newnii
+
+
 def advanced_clip(
     data, p_min=35, p_max=99.98, nonnegative=True, dtype="int16", invert=False
 ):
diff --git a/src/eddymotion/estimator.py b/src/eddymotion/estimator.py
index da8f699e..5c28f113 100644
--- a/src/eddymotion/estimator.py
+++ b/src/eddymotion/estimator.py
@@ -251,12 +251,13 @@ def _prepare_kwargs(dwdata, kwargs):
     kwargs : :obj:`dict`
         Keyword arguments.
     """
+    from eddymotion.data.filtering import advanced_clip as _advanced_clip
 
     if dwdata.brainmask is not None:
         kwargs["mask"] = dwdata.brainmask
 
     if hasattr(dwdata, "bzero") and dwdata.bzero is not None:
-        kwargs["S0"] = dwdata.bzero
+        kwargs["S0"] = _advanced_clip(dwdata.bzero)
 
     if hasattr(dwdata, "gradients"):
         kwargs["gtab"] = dwdata.gradients

From 897d634bf4d7de0c5b743f4aa962df4332e74eb3 Mon Sep 17 00:00:00 2001
From: Oscar Esteban <code@oscaresteban.es>
Date: Wed, 10 Apr 2024 18:20:19 +0200
Subject: [PATCH 3/3] sty: add type hints and improve docstrings

---
 src/eddymotion/data/filtering.py | 166 +++++++++++++++++++++++--------
 1 file changed, 125 insertions(+), 41 deletions(-)

diff --git a/src/eddymotion/data/filtering.py b/src/eddymotion/data/filtering.py
index 3e08bb3d..6c14e3d4 100644
--- a/src/eddymotion/data/filtering.py
+++ b/src/eddymotion/data/filtering.py
@@ -22,64 +22,123 @@
 #
 """Filtering data."""
 
+from __future__ import annotations
 
-def gaussian_filter(data, vox_width):
+from numbers import Number
+
+import numpy as np
+from nibabel import Nifti1Image, load
+from scipy.ndimage import gaussian_filter as _gs
+from scipy.ndimage import map_coordinates, median_filter
+from skimage.morphology import ball
+
+
+def gaussian_filter(
+    data: np.ndarray,
+    vox_width: float | tuple[float, float, float],
+) -> np.ndarray:
     """
-    Apply a Gaussian smoothing filter of a given width (in voxels)
+    Applies a Gaussian smoothing filter to a n-dimensional array.
+
+    This function smooths the input data using a Gaussian filter with a specified
+    width (sigma) in voxels along each relevant dimension. It automatically
+    handles different data dimensionalities (2D, 3D, or 4D) and ensures that
+    smoothing is not applied along the time or orientation dimension (if present
+    in 4D data).
 
     Parameters
     ----------
-    data : :obj:`numpy.ndarray`
-        The input image's data array
-    vox_width : :obj:`numbers.Number` or :obj:`tuple` or :obj:`list`
-        The smoothing kernel width in voxels
+    data : :obj:`~numpy.ndarray`
+        The input data array.
+    vox_width : :obj:`float` or :obj:`tuple` of three :obj:`float`
+        The smoothing kernel width (sigma) in voxels. If a single :obj:`float` is provided,
+        it is applied uniformly across all spatial dimensions. Alternatively, a
+        tuple of three floats can be provided to specify different sigma values
+        for each spatial dimension (x, y, z).
 
     Returns
     -------
-    data : :obj:`numpy.ndarray`
-        The smoothed dataset
+    :obj:`~numpy.ndarray`
+        The smoothed data array.
 
     """
-    from numbers import Number
-    import numpy as np
-    from scipy.ndimage import gaussian_filter as _gs
 
-    data = np.squeeze(data)  # drop unused dimensions
+    data = np.squeeze(data)  # Drop unused dimensions
     ndim = data.ndim
 
     if isinstance(vox_width, Number):
         vox_width = tuple([vox_width] * min(3, ndim))
 
-    # Do not smooth across time/orientation
+    # Do not smooth across time/orientation (if present in 4D data)
     if ndim == 4 and len(vox_width) == 3:
         vox_width = (*vox_width, 0)
 
     return _gs(data, vox_width)
 
 
-def decimate(in_file, factor, smooth=True, order=3, nonnegative=True):
-    from numbers import Number
-    import numpy as np
-    from scipy.ndimage import map_coordinates
-    import nibabel as nb
+def decimate(
+    in_file: str,
+    factor: int | tuple[int, int, int],
+    smooth: bool | tuple[int, int, int] = True,
+    order: int = 3,
+    nonnegative: bool = True,
+) -> Nifti1Image:
+    """
+    Decimates a 3D or 4D Nifti image by a specified downsampling factor.
+
+    This function downsamples a Nifti image by averaging voxels within a user-defined
+    factor in each spatial dimension. It optionally applies Gaussian smoothing
+    before downsampling to reduce aliasing artifacts. The function also handles
+    updating the affine transformation matrix to reflect the change in voxel size.
+
+    Parameters
+    ----------
+    in_file : :obj:`str`
+        Path to the input NIfTI image file.
+    factor : :obj:`int` or :obj:`tuple`
+        The downsampling factor. If a single integer is provided, it is applied
+        uniformly across all spatial dimensions. Alternatively, a tuple of three
+        integers can be provided to specify different downsampling factors for each
+        spatial dimension (x, y, z). Values must be greater than 0.
+    smooth : :obj:`bool` or :obj:`tuple`, optional (default=``True``)
+        Controls application of Gaussian smoothing before downsampling. If True,
+        a smoothing kernel size equal to the downsampling factor is applied.
+        Alternatively, a tuple of three integers can be provided to specify
+        different smoothing kernel sizes for each spatial dimension. Setting to
+        False disables smoothing.
+    order : :obj:`int`, optional (default=3)
+        The order of the spline interpolation used for downsampling. Higher
+        orders provide smoother results but are computationally more expensive.
+    nonnegative : :obj:`bool`, optional (default=``True``)
+        If True, negative values in the downsampled data are set to zero.
+
+    Returns
+    -------
+    :obj:`~nibabel.Nifti1Image`
+        The downsampled NIfTI image object.
 
-    imnii = nb.load(in_file)
-    data = np.squeeze(imnii.get_fdata())
+    """
+
+    imnii = load(in_file)
+    data = np.squeeze(imnii.get_fdata())  # Remove unused dimensions
     datashape = data.shape
     ndim = data.ndim
 
     if isinstance(factor, Number):
         factor = tuple([factor] * min(3, ndim))
 
+    if any(f <= 0 for f in factor[:3]):
+        raise ValueError("All spatial downsampling factors must be positive.")
+
     if ndim == 4 and len(factor) == 3:
         factor = (*factor, 0)
 
     if smooth:
         if smooth is True:
             smooth = factor
-
         data = gaussian_filter(data, smooth)
 
+    # Create downsampled grid
     down_grid = np.array(
         np.meshgrid(
             *[np.arange(_s, step=int(_f) or 1) for _s, _f in zip(datashape, factor)],
@@ -87,11 +146,12 @@ def decimate(in_file, factor, smooth=True, order=3, nonnegative=True):
         )
     )
     new_shape = down_grid.shape[1:]
+
+    # Update affine transformation
     newaffine = imnii.affine.copy()
     newaffine[:3, :3] = np.array(factor[:3]) * newaffine[:3, :3]
-    # newaffine[:3, 3] += imnii.affine[:3, :3] @ (0.5 / np.array(factor[:3], dtype="float32"))
 
-    # Resample data in the new grid
+    # Resample data on the new grid
     resampled = map_coordinates(
         data,
         down_grid.reshape((ndim, np.prod(new_shape))),
@@ -101,43 +161,67 @@ def decimate(in_file, factor, smooth=True, order=3, nonnegative=True):
         prefilter=True,
     ).reshape(new_shape)
 
+    # Set negative values to zero (optional)
     if order > 2 and nonnegative:
         resampled[resampled < 0] = 0
 
-    newnii = nb.Nifti1Image(resampled, newaffine, imnii.header)
+    # Create new Nifti image with updated information
+    newnii = Nifti1Image(resampled, newaffine, imnii.header)
     newnii.set_sform(newaffine, code=1)
     newnii.set_qform(newaffine, code=1)
+
     return newnii
 
 
 def advanced_clip(
-    data, p_min=35, p_max=99.98, nonnegative=True, dtype="int16", invert=False
-):
+    data: np.ndarray,
+    p_min: float = 35,
+    p_max: float = 99.98,
+    nonnegative: bool = True,
+    dtype: str | np.dtype = "int16",
+    invert: bool = False,
+) -> np.ndarray:
     """
-    Remove outliers at both ends of the intensity distribution and fit into a given dtype.
+    Clips outliers from a n-dimensional array and scales/casts to a specified data type.
 
-    This interface tries to emulate ANTs workflows' massaging that truncate images into
-    the 0-255 range, and applies percentiles for clipping images.
-    For image registration, normalizing the intensity into a compact range (e.g., uint8)
-    is generally advised.
+    This function removes outliers from both ends of the intensity distribution
+    in a n-dimensional array using percentiles. It optionally enforces non-negative
+    values and scales the data to fit within a specified data type (e.g., uint8
+    for image registration). To remove outliers more robustly, the function
+    first applies a median filter to the data before calculating clipping thresholds.
 
-    To more robustly determine the clipping thresholds, spikes are removed from data with
-    a median filter.
-    Once the thresholds are calculated, the denoised data are thrown away and the thresholds
-    are applied on the original image.
+    Parameters
+    ----------
+    data : :obj:`~numpy.ndarray`
+        The input n-dimensional data array.
+    p_min : :obj:`float`, optional (default=35)
+        The lower percentile threshold for clipping. Values below this percentile
+        are set to the threshold value.
+    p_max : :obj:`float`, optional (default=99.98)
+        The upper percentile threshold for clipping. Values above this percentile
+        are set to the threshold value.
+    nonnegative : :obj:`bool`, optional (default=``True``)
+        If True, only consider non-negative values when calculating thresholds.
+    dtype : :obj:`str` or :obj:`~numpy.dtype`, optional (default=``"int16"``)
+        The desired data type for the output array. Supported types are "uint8"
+        and "int16".
+    invert : :obj:`bool`, optional (default=``False``)
+        If True, inverts the intensity values after scaling (1.0 - data).
+
+    Returns
+    -------
+    :obj:`~numpy.ndarray`
+        The clipped and scaled data array with the specified data type.
 
     """
-    import numpy as np
-    from scipy import ndimage
-    from skimage.morphology import ball
 
-    # Calculate stats on denoised version, to preempt outliers from biasing
-    denoised = ndimage.median_filter(data, footprint=ball(3))
+    # Calculate stats on denoised version to avoid outlier bias
+    denoised = median_filter(data, footprint=ball(3))
 
     a_min = np.percentile(denoised[denoised >= 0] if nonnegative else denoised, p_min)
     a_max = np.percentile(denoised[denoised >= 0] if nonnegative else denoised, p_max)
 
-    # Clip and cast
+    # Clip and scale data
     data = np.clip(data, a_min=a_min, a_max=a_max)
     data -= data.min()
     data /= data.max()