From 9ec6fa7afe3021768c615226488c598c4bc29ee6 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 10:48:16 -0400
Subject: [PATCH 01/14] Try Lomb-Scargle-based interpolation.

---
 xcp_d/utils/utils.py | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index 6b8aab7fb..c6769a20c 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -482,7 +482,7 @@ def denoise_with_nilearn(
 
 
 def _interpolate(*, arr, sample_mask, TR):
-    """Replace high-motion volumes with cubic-spline interpolated values.
+    """Replace high-motion volumes with a Lomb-Scargle periodogram-based interpolation method.
 
     This function applies Nilearn's :func:`~nilearn.signal._interpolate_volumes` function,
     followed by an extra step that replaces extrapolated, high-motion values at the beginning and
@@ -510,13 +510,40 @@ def _interpolate(*, arr, sample_mask, TR):
 
     outlier_idx = list(np.where(~sample_mask)[0])
     n_volumes = arr.shape[0]
+    fs = 1 / TR
+    time = np.arange(0, n_volumes * TR, TR)
+    censored_time = time[sample_mask]
+    n_voxels = arr.shape[1]
+
+    frequencies_hz = np.linspace(0, 0.5 * fs, (n_volumes // 2) + 1)[1:]
+    angular_frequencies = 2 * np.pi * frequencies_hz
+
+    interpolated_arr = arr.copy()
+    for i_voxel in range(n_voxels):
+        voxel_data = arr[:, i_voxel]
+        # z-score the data so Lomb-Scargle will work
+        voxel_mean = np.mean(voxel_data)
+        voxel_sd = np.std(voxel_data)
+        voxel_data = voxel_data - voxel_mean
+        voxel_data = voxel_data / voxel_sd
+
+        # Use Lomb-Scargle periodogram to estimate the power spectrum using the low-motion volumes
+        censored_voxel_data = voxel_data[sample_mask]
+        power_spectrum = signal.lombscargle(
+            censored_time,
+            censored_voxel_data,
+            angular_frequencies,
+            normalize=True,
+        )
+        interpolated_voxel_data = sum(
+            pow * np.sin(hz * time) for hz, pow in zip(angular_frequencies, power_spectrum)
+        )
+        # Rescale the interpolated data back to the original scale
+        interpolated_voxel_data = interpolated_voxel_data * voxel_sd + voxel_mean
+        # Replace low-motion volumes with the original data
+        interpolated_voxel_data[sample_mask] = arr[sample_mask, i_voxel]
+        interpolated_arr[:, i_voxel] = interpolated_voxel_data
 
-    interpolated_arr = signal._interpolate_volumes(
-        arr,
-        sample_mask=sample_mask,
-        t_r=TR,
-        extrapolate=True,
-    )
     # Replace any high-motion volumes at the beginning or end of the run with the closest
     # low-motion volume's data.
     # Use https://stackoverflow.com/a/48106843/2589328 to group consecutive blocks of outliers.

From 25b5c5de99db7d1c4ddf2355bc4fed9a0685d88f Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 11:07:34 -0400
Subject: [PATCH 02/14] Fix import.

---
 xcp_d/utils/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index c6769a20c..37ae84652 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -506,7 +506,7 @@ def _interpolate(*, arr, sample_mask, TR):
     -----
     This function won't work if sample_mask is all zeros, but that should never happen.
     """
-    from nilearn import signal
+    from scipy import signal
 
     outlier_idx = list(np.where(~sample_mask)[0])
     n_volumes = arr.shape[0]

From d36cc5692684048df6e1cefac8c84076ce3b0984 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 11:22:55 -0400
Subject: [PATCH 03/14] Scale the reconstructed data.

---
 xcp_d/utils/utils.py | 36 ++++++++++--------------------------
 1 file changed, 10 insertions(+), 26 deletions(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index 37ae84652..16f08d147 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -508,7 +508,6 @@ def _interpolate(*, arr, sample_mask, TR):
     """
     from scipy import signal
 
-    outlier_idx = list(np.where(~sample_mask)[0])
     n_volumes = arr.shape[0]
     fs = 1 / TR
     time = np.arange(0, n_volumes * TR, TR)
@@ -535,40 +534,25 @@ def _interpolate(*, arr, sample_mask, TR):
             angular_frequencies,
             normalize=True,
         )
+        power_spectrum = np.sqrt(power_spectrum)
         interpolated_voxel_data = sum(
             pow * np.sin(hz * time) for hz, pow in zip(angular_frequencies, power_spectrum)
         )
+        # Use least squares to estimate the scaling factor
+        # XXX: AFAICT we shouldn't need a scaling factor, but this does help.
+        beta = np.linalg.lstsq(
+            interpolated_voxel_data[sample_mask, np.newaxis],
+            censored_voxel_data,
+            rcond=None,
+        )[0]
+        interpolated_voxel_data *= beta[0]
+
         # Rescale the interpolated data back to the original scale
         interpolated_voxel_data = interpolated_voxel_data * voxel_sd + voxel_mean
         # Replace low-motion volumes with the original data
         interpolated_voxel_data[sample_mask] = arr[sample_mask, i_voxel]
         interpolated_arr[:, i_voxel] = interpolated_voxel_data
 
-    # Replace any high-motion volumes at the beginning or end of the run with the closest
-    # low-motion volume's data.
-    # Use https://stackoverflow.com/a/48106843/2589328 to group consecutive blocks of outliers.
-    gaps = [[start, end] for start, end in zip(outlier_idx, outlier_idx[1:]) if start + 1 < end]
-    edges = iter(outlier_idx[:1] + sum(gaps, []) + outlier_idx[-1:])
-    consecutive_outliers_idx = list(zip(edges, edges))
-    first_outliers = consecutive_outliers_idx[0]
-    last_outliers = consecutive_outliers_idx[-1]
-
-    # Replace outliers at beginning of run
-    if first_outliers[0] == 0:
-        LOGGER.warning(
-            f"Outlier volumes at beginning of run ({first_outliers[0]}-{first_outliers[1]}) "
-            "will be replaced with first non-outlier volume's values."
-        )
-        interpolated_arr[: first_outliers[1] + 1, :] = interpolated_arr[first_outliers[1] + 1, :]
-
-    # Replace outliers at end of run
-    if last_outliers[1] == n_volumes - 1:
-        LOGGER.warning(
-            f"Outlier volumes at end of run ({last_outliers[0]}-{last_outliers[1]}) "
-            "will be replaced with last non-outlier volume's values."
-        )
-        interpolated_arr[last_outliers[0] :, :] = interpolated_arr[last_outliers[0] - 1, :]
-
     return interpolated_arr
 
 

From b75c16a611ace5c59025c70ef729ef0510690274 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 11:39:35 -0400
Subject: [PATCH 04/14] Update utils.py

---
 xcp_d/utils/utils.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index 16f08d147..b85d38748 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -540,11 +540,16 @@ def _interpolate(*, arr, sample_mask, TR):
         )
         # Use least squares to estimate the scaling factor
         # XXX: AFAICT we shouldn't need a scaling factor, but this does help.
-        beta = np.linalg.lstsq(
-            interpolated_voxel_data[sample_mask, np.newaxis],
-            censored_voxel_data,
-            rcond=None,
-        )[0]
+        try:
+            beta = np.linalg.lstsq(
+                interpolated_voxel_data[sample_mask, np.newaxis],
+                censored_voxel_data,
+                rcond=None,
+            )[0]
+        except np.linalg.LinAlgError:
+            raise ValueError(
+                f"lstsq failed:\n\n{censored_voxel_data}\n\n{interpolated_voxel_data[sample_mask]}"
+            )
         interpolated_voxel_data *= beta[0]
 
         # Rescale the interpolated data back to the original scale

From ac2c40d6615e47b71d53071bf488bec25ec809c2 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 12:40:24 -0400
Subject: [PATCH 05/14] Use Power's method.

---
 xcp_d/utils/utils.py | 136 ++++++++++++++++++++++++++++++-------------
 1 file changed, 97 insertions(+), 39 deletions(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index b85d38748..d7497850e 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -506,61 +506,119 @@ def _interpolate(*, arr, sample_mask, TR):
     -----
     This function won't work if sample_mask is all zeros, but that should never happen.
     """
-    from scipy import signal
+    from xcp_d.utils.utils import get_transform
 
     n_volumes = arr.shape[0]
-    fs = 1 / TR
     time = np.arange(0, n_volumes * TR, TR)
     censored_time = time[sample_mask]
     n_voxels = arr.shape[1]
 
-    frequencies_hz = np.linspace(0, 0.5 * fs, (n_volumes // 2) + 1)[1:]
-    angular_frequencies = 2 * np.pi * frequencies_hz
-
     interpolated_arr = arr.copy()
     for i_voxel in range(n_voxels):
         voxel_data = arr[:, i_voxel]
-        # z-score the data so Lomb-Scargle will work
-        voxel_mean = np.mean(voxel_data)
-        voxel_sd = np.std(voxel_data)
-        voxel_data = voxel_data - voxel_mean
-        voxel_data = voxel_data / voxel_sd
-
-        # Use Lomb-Scargle periodogram to estimate the power spectrum using the low-motion volumes
-        censored_voxel_data = voxel_data[sample_mask]
-        power_spectrum = signal.lombscargle(
+        interpolated_voxel_data = get_transform(
             censored_time,
-            censored_voxel_data,
-            angular_frequencies,
-            normalize=True,
-        )
-        power_spectrum = np.sqrt(power_spectrum)
-        interpolated_voxel_data = sum(
-            pow * np.sin(hz * time) for hz, pow in zip(angular_frequencies, power_spectrum)
+            voxel_data[sample_mask, None],
+            time,
+            oversampling_factor=4,
+            TR=TR,
         )
-        # Use least squares to estimate the scaling factor
-        # XXX: AFAICT we shouldn't need a scaling factor, but this does help.
-        try:
-            beta = np.linalg.lstsq(
-                interpolated_voxel_data[sample_mask, np.newaxis],
-                censored_voxel_data,
-                rcond=None,
-            )[0]
-        except np.linalg.LinAlgError:
-            raise ValueError(
-                f"lstsq failed:\n\n{censored_voxel_data}\n\n{interpolated_voxel_data[sample_mask]}"
-            )
-        interpolated_voxel_data *= beta[0]
 
-        # Rescale the interpolated data back to the original scale
-        interpolated_voxel_data = interpolated_voxel_data * voxel_sd + voxel_mean
-        # Replace low-motion volumes with the original data
-        interpolated_voxel_data[sample_mask] = arr[sample_mask, i_voxel]
-        interpolated_arr[:, i_voxel] = interpolated_voxel_data
+        # Replace high-motion volumes in interpolated array with the modified data
+        interpolated_arr[~sample_mask, i_voxel] = interpolated_voxel_data[~sample_mask]
 
     return interpolated_arr
 
 
+def get_transform(time, arr, sample_mask, oversampling_factor, max_freq, TR):
+    """Calculate the Lomb-Scargle periodogram for a time series.
+
+    Parameters
+    ----------
+    time : ndarray
+        Time points for which observations are present.
+    arr : ndarray
+        Observations in columns. The number of rows equals the number of time points.
+    sample_mask : ndarray
+        Time points for which to reconstruct the original time series.
+    oversampling_factor : int
+        Oversampling frequency, generally >= 4.
+    max_freq : float
+        Highest frequency allowed. max_freq = 1 means 1*nyquist limit is highest frequency sampled.
+
+    Returns
+    -------
+    reconstructed_arr : ndarray
+        The reconstructed time series.
+    """
+    import numpy as np
+
+    arr = arr[:, None]
+    fs = 1 / TR
+    n_volumes = arr.shape[0]  # Number of time points in censored array
+    n_voxels = arr.shape[1]  # Number of voxels
+    time_span = np.max(time) - np.min(time)  # Total time span
+    n_oversampled_timepoints = int((time_span / TR) * oversampling_factor)
+
+    # calculate sampling frequencies
+    max_freq = max_freq * 0.5 * fs
+    frequencies_hz = np.linspace(
+        1 / (time_span * oversampling_factor),
+        max_freq,
+        n_oversampled_timepoints,
+    )
+
+    # angular frequencies and constant offsets
+    frequencies_angular = 2 * np.pi * frequencies_hz
+    offsets = np.arctan2(
+        np.sum(np.sin(2 * np.dot(frequencies_angular[:, None], time[None, :])), axis=1),
+        np.sum(np.cos(2 * np.dot(frequencies_angular[:, None], time[None, :])), axis=1),
+    ) / (2 * frequencies_angular)
+
+    # spectral power sin and cosine terms
+    spectral_power = (
+        np.dot(frequencies_angular[:, None], time[None, :]) -
+        (frequencies_angular[:, None] * offsets[:, None])
+    )
+    cterm = np.cos(spectral_power)
+    sterm = np.sin(spectral_power)
+
+    D = arr.copy()
+    D = D.reshape((1, n_volumes, n_voxels))
+
+    # This calculation is done by separately for the numerator, denominator, and the division
+    cos_mult = cterm[:, :, None] * D
+    numerator = np.sum(cos_mult, axis=1)
+    denominator = np.sum(cterm**2, axis=1)[:, None]
+    power_cos = numerator / denominator
+
+    # Repeat the above for Sine term
+    sin_mult = sterm[:, :, None] * D
+    numerator = np.sum(sin_mult, axis=1)
+    denominator = np.sum(sterm**2, axis=1)[:, None]
+    power_sin = numerator / denominator
+
+    # The inverse function to re-construct the original time series
+    T_rep = np.repeat(sample_mask[None, :], repeats=len(frequencies_hz), axis=0)[:, :, None]
+    prod = T_rep * frequencies_angular[:, None, None]
+    sin_t = np.sin(prod)
+    cos_t = np.cos(prod)
+    sw_p = sin_t * power_sin[:, None, :]
+    cw_p = cos_t * power_cos[:, None, :]
+    S = np.sum(sw_p, axis=0)
+    C = np.sum(cw_p, axis=0)
+    reconstructed_arr = C + S
+    reconstructed_arr = reconstructed_arr.reshape((len(sample_mask), arr.shape[1]))
+
+    # Normalize the reconstructed spectrum, needed when oversampling_factor > 1
+    Std_H = np.std(reconstructed_arr, axis=0)
+    Std_h = np.std(arr, axis=0)
+    norm_fac = Std_H / Std_h
+    reconstructed_arr = reconstructed_arr / norm_fac[None, :]
+
+    return reconstructed_arr
+
+
 def _select_first(lst):
     """Select the first element in a list."""
     return lst[0]

From 8342a86f86452c8ed4de57fe88abe3d114be14ba Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 12:54:51 -0400
Subject: [PATCH 06/14] Fix things.

---
 xcp_d/data/boilerplate.bib | 11 +++++++++++
 xcp_d/utils/utils.py       | 26 ++++++++++++++++++--------
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/xcp_d/data/boilerplate.bib b/xcp_d/data/boilerplate.bib
index 62b295efd..470f58b82 100644
--- a/xcp_d/data/boilerplate.bib
+++ b/xcp_d/data/boilerplate.bib
@@ -798,3 +798,14 @@ @article{lindquist2019modular
   url={https://doi.org/10.1002/hbm.24528},
   doi={10.1002/hbm.24528}
 }
+
+@article{mathias2004algorithms,
+  title={Algorithms for spectral analysis of irregularly sampled time series},
+  author={Mathias, Adolf and Grond, Florian and Guardans, Ramon and Seese, Detlef and Canela, Miguel and Diebner, Hans H},
+  journal={Journal of Statistical Software},
+  volume={11},
+  pages={1--27},
+  year={2004},
+  url={https://doi.org/10.18637/jss.v011.i02},
+  doi={10.18637/jss.v011.i02}
+}
diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index d7497850e..6df63f063 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -530,8 +530,8 @@ def _interpolate(*, arr, sample_mask, TR):
     return interpolated_arr
 
 
-def get_transform(time, arr, sample_mask, oversampling_factor, max_freq, TR):
-    """Calculate the Lomb-Scargle periodogram for a time series.
+def get_transform(time, arr, sample_mask, oversampling_factor, TR):
+    """Interpolate high-motion volumes in a time series using least squares spectral analysis.
 
     Parameters
     ----------
@@ -543,13 +543,24 @@ def get_transform(time, arr, sample_mask, oversampling_factor, max_freq, TR):
         Time points for which to reconstruct the original time series.
     oversampling_factor : int
         Oversampling frequency, generally >= 4.
-    max_freq : float
-        Highest frequency allowed. max_freq = 1 means 1*nyquist limit is highest frequency sampled.
 
     Returns
     -------
     reconstructed_arr : ndarray
         The reconstructed time series.
+
+    Notes
+    -----
+    This function is translated from Anish Mitra's MATLAB function ``getTransform``,
+    available at https://www.jonathanpower.net/2014-ni-motion-2.html.
+
+    The function implements the least squares spectral analysis method described in
+    :footcite:t:`power_fd_dvars`, which in turn is based on the method described in
+    :footcite:t:`mathias2004algorithms`.
+
+    References
+    ----------
+    .. footbibliography::
     """
     import numpy as np
 
@@ -561,7 +572,7 @@ def get_transform(time, arr, sample_mask, oversampling_factor, max_freq, TR):
     n_oversampled_timepoints = int((time_span / TR) * oversampling_factor)
 
     # calculate sampling frequencies
-    max_freq = max_freq * 0.5 * fs
+    max_freq = 0.5 * fs
     frequencies_hz = np.linspace(
         1 / (time_span * oversampling_factor),
         max_freq,
@@ -576,9 +587,8 @@ def get_transform(time, arr, sample_mask, oversampling_factor, max_freq, TR):
     ) / (2 * frequencies_angular)
 
     # spectral power sin and cosine terms
-    spectral_power = (
-        np.dot(frequencies_angular[:, None], time[None, :]) -
-        (frequencies_angular[:, None] * offsets[:, None])
+    spectral_power = np.dot(frequencies_angular[:, None], time[None, :]) - (
+        frequencies_angular[:, None] * offsets[:, None]
     )
     cterm = np.cos(spectral_power)
     sterm = np.sin(spectral_power)

From 0dd9f07f90e26a5423d4aca2418be6c1e2afe852 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 13:06:51 -0400
Subject: [PATCH 07/14] Try that.

---
 xcp_d/utils/utils.py | 44 +++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index 6df63f063..15b917646 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -505,6 +505,14 @@ def _interpolate(*, arr, sample_mask, TR):
     Notes
     -----
     This function won't work if sample_mask is all zeros, but that should never happen.
+
+    The function uses the least squares spectral analysis method described in
+    :footcite:t:`power_fd_dvars`, which in turn is based on the method described in
+    :footcite:t:`mathias2004algorithms`.
+
+    References
+    ----------
+    .. footbibliography::
     """
     from xcp_d.utils.utils import get_transform
 
@@ -517,37 +525,43 @@ def _interpolate(*, arr, sample_mask, TR):
     for i_voxel in range(n_voxels):
         voxel_data = arr[:, i_voxel]
         interpolated_voxel_data = get_transform(
-            censored_time,
-            voxel_data[sample_mask, None],
-            time,
+            time=censored_time,
+            arr=voxel_data[sample_mask, None],
+            sample_mask=time,
             oversampling_factor=4,
             TR=TR,
         )
 
         # Replace high-motion volumes in interpolated array with the modified data
-        interpolated_arr[~sample_mask, i_voxel] = interpolated_voxel_data[~sample_mask]
+        interpolated_arr[~sample_mask, i_voxel] = interpolated_voxel_data[~sample_mask, 0]
 
     return interpolated_arr
 
 
-def get_transform(time, arr, sample_mask, oversampling_factor, TR):
+def get_transform(*, censored_time, arr, uncensored_time, oversampling_factor, TR):
     """Interpolate high-motion volumes in a time series using least squares spectral analysis.
 
     Parameters
     ----------
-    time : ndarray
+    censored_time : ndarray of shape (C,)
         Time points for which observations are present.
-    arr : ndarray
+        C = number of low-motion time points
+    arr : ndarray of shape (C, S)
         Observations in columns. The number of rows equals the number of time points.
-    sample_mask : ndarray
+        C = number of low-motion time points
+        S = number of voxels
+    uncensored_time : ndarray of shape (T,)
         Time points for which to reconstruct the original time series.
+        T = total number of time points
     oversampling_factor : int
         Oversampling frequency, generally >= 4.
 
     Returns
     -------
-    reconstructed_arr : ndarray
+    reconstructed_arr : ndarray of shape (T, S)
         The reconstructed time series.
+        T = number of time points in uncensored_time
+        S = number of voxels in arr
 
     Notes
     -----
@@ -568,7 +582,7 @@ def get_transform(time, arr, sample_mask, oversampling_factor, TR):
     fs = 1 / TR
     n_volumes = arr.shape[0]  # Number of time points in censored array
     n_voxels = arr.shape[1]  # Number of voxels
-    time_span = np.max(time) - np.min(time)  # Total time span
+    time_span = np.max(censored_time) - np.min(censored_time)  # Total time span
     n_oversampled_timepoints = int((time_span / TR) * oversampling_factor)
 
     # calculate sampling frequencies
@@ -582,12 +596,12 @@ def get_transform(time, arr, sample_mask, oversampling_factor, TR):
     # angular frequencies and constant offsets
     frequencies_angular = 2 * np.pi * frequencies_hz
     offsets = np.arctan2(
-        np.sum(np.sin(2 * np.dot(frequencies_angular[:, None], time[None, :])), axis=1),
-        np.sum(np.cos(2 * np.dot(frequencies_angular[:, None], time[None, :])), axis=1),
+        np.sum(np.sin(2 * np.dot(frequencies_angular[:, None], censored_time[None, :])), axis=1),
+        np.sum(np.cos(2 * np.dot(frequencies_angular[:, None], censored_time[None, :])), axis=1),
     ) / (2 * frequencies_angular)
 
     # spectral power sin and cosine terms
-    spectral_power = np.dot(frequencies_angular[:, None], time[None, :]) - (
+    spectral_power = np.dot(frequencies_angular[:, None], censored_time[None, :]) - (
         frequencies_angular[:, None] * offsets[:, None]
     )
     cterm = np.cos(spectral_power)
@@ -609,7 +623,7 @@ def get_transform(time, arr, sample_mask, oversampling_factor, TR):
     power_sin = numerator / denominator
 
     # The inverse function to re-construct the original time series
-    T_rep = np.repeat(sample_mask[None, :], repeats=len(frequencies_hz), axis=0)[:, :, None]
+    T_rep = np.repeat(uncensored_time[None, :], repeats=len(frequencies_hz), axis=0)[:, :, None]
     prod = T_rep * frequencies_angular[:, None, None]
     sin_t = np.sin(prod)
     cos_t = np.cos(prod)
@@ -618,7 +632,7 @@ def get_transform(time, arr, sample_mask, oversampling_factor, TR):
     S = np.sum(sw_p, axis=0)
     C = np.sum(cw_p, axis=0)
     reconstructed_arr = C + S
-    reconstructed_arr = reconstructed_arr.reshape((len(sample_mask), arr.shape[1]))
+    reconstructed_arr = reconstructed_arr.reshape((len(uncensored_time), arr.shape[1]))
 
     # Normalize the reconstructed spectrum, needed when oversampling_factor > 1
     Std_H = np.std(reconstructed_arr, axis=0)

From b04eb4730ca14842fe77236b35db8d9c82aa447c Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 13:17:35 -0400
Subject: [PATCH 08/14] Update utils.py

---
 xcp_d/utils/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index 15b917646..ed950df09 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -525,9 +525,9 @@ def _interpolate(*, arr, sample_mask, TR):
     for i_voxel in range(n_voxels):
         voxel_data = arr[:, i_voxel]
         interpolated_voxel_data = get_transform(
-            time=censored_time,
+            censored_time=censored_time,
             arr=voxel_data[sample_mask, None],
-            sample_mask=time,
+            uncensored_time=time,
             oversampling_factor=4,
             TR=TR,
         )

From 98b8d50dc6df3ae2f1bf46ecdb86704d76df1195 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 13:29:49 -0400
Subject: [PATCH 09/14] Update utils.py

---
 xcp_d/utils/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index ed950df09..cb6278353 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -531,6 +531,7 @@ def _interpolate(*, arr, sample_mask, TR):
             oversampling_factor=4,
             TR=TR,
         )
+        LOGGER.info(f"Interpolated voxel {i_voxel + 1}: {interpolated_voxel_data.shape}.")
 
         # Replace high-motion volumes in interpolated array with the modified data
         interpolated_arr[~sample_mask, i_voxel] = interpolated_voxel_data[~sample_mask, 0]

From 6a28afd1efcd2f89b728efa60dd7f8d44f5c09ec Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 14:00:07 -0400
Subject: [PATCH 10/14] Fix.

---
 xcp_d/utils/utils.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index cb6278353..a0992de74 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -526,7 +526,7 @@ def _interpolate(*, arr, sample_mask, TR):
         voxel_data = arr[:, i_voxel]
         interpolated_voxel_data = get_transform(
             censored_time=censored_time,
-            arr=voxel_data[sample_mask, None],
+            arr=voxel_data[sample_mask],
             uncensored_time=time,
             oversampling_factor=4,
             TR=TR,
@@ -579,6 +579,12 @@ def get_transform(*, censored_time, arr, uncensored_time, oversampling_factor, T
     """
     import numpy as np
 
+    assert arr.ndim == 1
+    assert censored_time.ndim == 1
+    assert uncensored_time.ndim == 1
+    assert arr.shape[0] == censored_time.shape[0]
+    assert uncensored_time.shape[0] > censored_time.shape[0]
+
     arr = arr[:, None]
     fs = 1 / TR
     n_volumes = arr.shape[0]  # Number of time points in censored array

From 865128075bae05197d545a33cfd656755a66ed2a Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Mon, 15 Apr 2024 15:06:46 -0400
Subject: [PATCH 11/14] Update utils.py

---
 xcp_d/utils/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index a0992de74..294aa1622 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -531,7 +531,6 @@ def _interpolate(*, arr, sample_mask, TR):
             oversampling_factor=4,
             TR=TR,
         )
-        LOGGER.info(f"Interpolated voxel {i_voxel + 1}: {interpolated_voxel_data.shape}.")
 
         # Replace high-motion volumes in interpolated array with the modified data
         interpolated_arr[~sample_mask, i_voxel] = interpolated_voxel_data[~sample_mask, 0]

From 62621e811634a2482fb32a4f603701fdbec83e55 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Tue, 16 Apr 2024 09:27:35 -0400
Subject: [PATCH 12/14] Try to address DataFrame warnings.

---
 xcp_d/utils/confounds.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/xcp_d/utils/confounds.py b/xcp_d/utils/confounds.py
index bc9223489..d8b8fb020 100644
--- a/xcp_d/utils/confounds.py
+++ b/xcp_d/utils/confounds.py
@@ -76,12 +76,14 @@ def load_motion(
     columns = motion_confounds_df.columns.tolist()
     for col in columns:
         new_col = f"{col}_derivative1"
-        motion_confounds_df[new_col] = motion_confounds_df[col].diff()
+        col_series = motion_confounds_df[col].copy()
+        motion_confounds_df[new_col] = col_series.diff()
 
     columns = motion_confounds_df.columns.tolist()
     for col in columns:
         new_col = f"{col}_power2"
-        motion_confounds_df[new_col] = motion_confounds_df[col] ** 2
+        col_series = motion_confounds_df[col].copy()
+        motion_confounds_df[new_col] = col_series**2
 
     return motion_confounds_df
 

From a03e64803b20161fe9c69f9880ee845ec81ecb43 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Tue, 16 Apr 2024 10:23:50 -0400
Subject: [PATCH 13/14] Use tqdm.

---
 pyproject.toml           | 1 +
 xcp_d/utils/confounds.py | 6 ++----
 xcp_d/utils/utils.py     | 4 +++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 94dcfcc35..f1de1d8f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,6 +43,7 @@ dependencies = [
     "sentry-sdk ~= 1.45.0",  # for usage reports
     "templateflow ~= 24.2.0",
     "toml",
+    "tqdm",
 ]
 dynamic = ["version"]
 
diff --git a/xcp_d/utils/confounds.py b/xcp_d/utils/confounds.py
index d8b8fb020..501c88059 100644
--- a/xcp_d/utils/confounds.py
+++ b/xcp_d/utils/confounds.py
@@ -76,14 +76,12 @@ def load_motion(
     columns = motion_confounds_df.columns.tolist()
     for col in columns:
         new_col = f"{col}_derivative1"
-        col_series = motion_confounds_df[col].copy()
-        motion_confounds_df[new_col] = col_series.diff()
+        motion_confounds_df.loc[:, new_col] = motion_confounds_df.loc[:, col].diff()
 
     columns = motion_confounds_df.columns.tolist()
     for col in columns:
         new_col = f"{col}_power2"
-        col_series = motion_confounds_df[col].copy()
-        motion_confounds_df[new_col] = col_series**2
+        motion_confounds_df.loc[:, new_col] = motion_confounds_df.loc[:, col] ** 2
 
     return motion_confounds_df
 
diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index 294aa1622..b40af66f8 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -514,6 +514,8 @@ def _interpolate(*, arr, sample_mask, TR):
     ----------
     .. footbibliography::
     """
+    from tqdm import trange
+
     from xcp_d.utils.utils import get_transform
 
     n_volumes = arr.shape[0]
@@ -522,7 +524,7 @@ def _interpolate(*, arr, sample_mask, TR):
     n_voxels = arr.shape[1]
 
     interpolated_arr = arr.copy()
-    for i_voxel in range(n_voxels):
+    for i_voxel in trange(n_voxels, desc="Interpolating high-motion volumes"):
         voxel_data = arr[:, i_voxel]
         interpolated_voxel_data = get_transform(
             censored_time=censored_time,

From 45ebff9ee3ff1fe8dfca4b9578d3e633191127a8 Mon Sep 17 00:00:00 2001
From: Taylor Salo <salot@pennmedicine.upenn.edu>
Date: Mon, 7 Oct 2024 12:39:15 -0400
Subject: [PATCH 14/14] Update utils.py

---
 xcp_d/utils/utils.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index b52bcb488..07e1e131c 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -588,6 +588,8 @@ def get_transform(*, censored_time, arr, uncensored_time, oversampling_factor, T
     ----------
     .. footbibliography::
     """
+    import warnings
+
     import numpy as np
 
     assert arr.ndim == 1
@@ -655,7 +657,12 @@ def get_transform(*, censored_time, arr, uncensored_time, oversampling_factor, T
     # Normalize the reconstructed spectrum, needed when oversampling_factor > 1
     Std_H = np.std(reconstructed_arr, axis=0)
     Std_h = np.std(arr, axis=0)
-    norm_fac = Std_H / Std_h
+    with warnings.filterwarnings("error") as w:
+        try:
+            norm_fac = Std_H / Std_h
+        except RuntimeWarning:
+            raise ValueError(arr)
+
     reconstructed_arr = reconstructed_arr / norm_fac[None, :]
 
     return reconstructed_arr