From 020818d54ad6090bfbbb0ee202e97f0503ab9a72 Mon Sep 17 00:00:00 2001
From: Taylor Salo <tsalo006@fiu.edu>
Date: Sun, 5 May 2024 10:19:47 -0400
Subject: [PATCH] Work on separate denoising.

---
 src/fmripost_aroma/cli/parser.py            |  17 +++
 src/fmripost_aroma/config.py                |   4 +
 src/fmripost_aroma/interfaces/confounds.py  | 118 ++++++++++++++++++++
 src/fmripost_aroma/interfaces/reportlets.py |  21 ++--
 src/fmripost_aroma/workflows/aroma.py       | 115 +++++++++++++++----
 5 files changed, 240 insertions(+), 35 deletions(-)

diff --git a/src/fmripost_aroma/cli/parser.py b/src/fmripost_aroma/cli/parser.py
index 0a53310..805f965 100644
--- a/src/fmripost_aroma/cli/parser.py
+++ b/src/fmripost_aroma/cli/parser.py
@@ -353,6 +353,23 @@ def _bids_filter(value, parser):
             "(e.g., if all the components are classified as signal or noise)"
         ),
     )
+    g_aroma.add_argument(
+        "--denoising-method",
+        action="store",
+        choices=["aggr", "nonaggr", "both", "none"],
+        default="nonaggr",
+        help="Denoising method to apply, if any.",
+    )
+    g_aroma.add_argument(
+        "--orthogonalize",
+        action="store_true",
+        default=False,
+        help=(
+            "If True, the AROMA-flagged noise components will be orthogonalized with respect to "
+            "the non-noise components in the MELODIC mixing matrix. "
+            "This can serve as an alternative to non-aggressive denoising."
+        ),
+    )
 
     g_carbon = parser.add_argument_group("Options for carbon usage tracking")
     g_carbon.add_argument(
diff --git a/src/fmripost_aroma/config.py b/src/fmripost_aroma/config.py
index eae2ac3..f207334 100644
--- a/src/fmripost_aroma/config.py
+++ b/src/fmripost_aroma/config.py
@@ -534,6 +534,10 @@ class workflow(_Config):
     melodic_dim = None
     """Number of ICA components to be estimated by MELODIC
     (positive = exact, negative = maximum)."""
+    denoising_method = "nonaggr"
+    """Denoising strategy to be used."""
+    orthogonalize = False
+    """Orthogonalize the AROMA-flagged components w.r.t. the non-flagged components."""
     cifti_output = None
     """Generate HCP Grayordinates, accepts either ``'91k'`` (default) or ``'170k'``."""
     dummy_scans = None
diff --git a/src/fmripost_aroma/interfaces/confounds.py b/src/fmripost_aroma/interfaces/confounds.py
index 659a79e..961373d 100644
--- a/src/fmripost_aroma/interfaces/confounds.py
+++ b/src/fmripost_aroma/interfaces/confounds.py
@@ -126,3 +126,121 @@ def _get_ica_confounds(ica_out_dir, skip_vols, newpath=None):
     ).to_csv(aroma_confounds, sep="\t", index=None)
 
     return aroma_confounds, motion_ics_out, melodic_mix_out, aroma_metadata_out
+
+
+class _ICADenoiseInputSpec(BaseInterfaceInputSpec):
+    method = traits.Enum("aggr", "nonaggr", "orthaggr", mandatory=True, desc="denoising method")
+    bold_file = File(exists=True, mandatory=True, desc="input file to denoise")
+    confounds = File(exists=True, mandatory=True, desc="confounds file")
+
+
+class _ICADenoiseOutputSpec(TraitedSpec):
+    denoised_file = File(exists=True, desc="denoised output file")
+
+
+class ICADenoise(SimpleInterface):
+
+    input_spec = _ICADenoiseInputSpec
+    output_spec = _ICADenoiseOutputSpec
+
+    def _run_interface(self, runtime):
+        import nibabel as nb
+        import numpy as np
+        import pandas as pd
+
+        method = self.inputs.method
+        bold_file = self.inputs.bold_file
+        confounds_file = self.inputs.confounds
+        metrics_file = self.inputs.metrics_file
+
+        bold_img = nb.load(bold_file)
+        bold_data = bold_img.get_fdata()
+        confounds_df = pd.read_table(confounds_file)
+
+        # Split up component time series into accepted and rejected components
+        metrics_df = pd.read_table(metrics_file)
+        rejected_columns = metrics_df.loc[metrics_df["classification"] == "rejected", "Component"]
+        accepted_columns = metrics_df.loc[metrics_df["classification"] == "accepted", "Component"]
+        rejected_components = confounds_df[rejected_columns].to_numpy()
+        accepted_components = confounds_df[accepted_columns].to_numpy()
+
+        if method == "aggr":
+            # Denoise the data with the motion components
+            masker = NiftiMasker(
+                mask_img=mask_file,
+                standardize_confounds=True,
+                standardize=False,
+                smoothing_fwhm=None,
+                detrend=False,
+                low_pass=None,
+                high_pass=None,
+                t_r=None,  # This shouldn't be necessary since we aren't bandpass filtering
+                reports=False,
+            )
+
+            # Denoise the data by fitting and transforming the data file using the masker
+            denoised_img_2d = masker.fit_transform(data_file, confounds=rejected_components)
+
+            # Transform denoised data back into 4D space
+            denoised_img_4d = masker.inverse_transform(denoised_img_2d)
+
+            # Save to file
+            denoised_img_4d.to_filename(
+                "sub-01_task-rest_space-MNI152NLin2009cAsym_desc-aggrDenoised_bold.nii.gz"
+            )
+        elif method == "orthaggr":
+            # Regress the good components out of the bad time series to get "pure evil" regressors
+            betas = np.linalg.lstsq(accepted_components, rejected_components, rcond=None)[0]
+            pred_bad_timeseries = np.dot(accepted_components, betas)
+            orth_bad_timeseries = rejected_components - pred_bad_timeseries
+
+            # Once you have these "pure evil" components, you can denoise the data
+            masker = NiftiMasker(
+                mask_img=mask_file,
+                standardize_confounds=True,
+                standardize=False,
+                smoothing_fwhm=None,
+                detrend=False,
+                low_pass=None,
+                high_pass=None,
+                t_r=None,  # This shouldn't be necessary since we aren't bandpass filtering
+                reports=False,
+            )
+
+            # Denoise the data by fitting and transforming the data file using the masker
+            denoised_img_2d = masker.fit_transform(data_file, confounds=orth_bad_timeseries)
+
+            # Transform denoised data back into 4D space
+            denoised_img_4d = masker.inverse_transform(denoised_img_2d)
+
+            # Save to file
+            denoised_img_4d.to_filename(
+                "sub-01_task-rest_space-MNI152NLin2009cAsym_desc-orthAggrDenoised_bold.nii.gz"
+            )
+        else:
+            # Apply the mask to the data image to get a 2d array
+            data = apply_mask(data_file, mask_file)
+
+            # Fit GLM to accepted components, rejected components and nuisance regressors
+            # (after adding a constant term)
+            regressors = np.hstack(
+                (
+                    rejected_components,
+                    accepted_components,
+                    np.ones((mixing_df.shape[0], 1)),
+                ),
+            )
+            betas = np.linalg.lstsq(regressors, data, rcond=None)[0][:-1]
+
+            # Denoise the data using the betas from just the bad components
+            confounds_idx = np.arange(rejected_components.shape[1])
+            pred_data = np.dot(rejected_components, betas[confounds_idx, :])
+            data_denoised = data - pred_data
+
+            # Save to file
+            denoised_img = unmask(data_denoised, mask_file)
+            denoised_img.to_filename(
+                "sub-01_task-rest_space-MNI152NLin2009cAsym_desc-nonaggrDenoised_bold.nii.gz"
+            )
+
+        return runtime
diff --git a/src/fmripost_aroma/interfaces/reportlets.py b/src/fmripost_aroma/interfaces/reportlets.py
index 5a9fb07..473dab4 100644
--- a/src/fmripost_aroma/interfaces/reportlets.py
+++ b/src/fmripost_aroma/interfaces/reportlets.py
@@ -185,10 +185,11 @@ def _generate_report(self):
         )
 
 
-class _ICAAROMAInputSpecRPT(
-    nrb._SVGReportCapableInputSpec,
-    fsl.aroma.ICA_AROMAInputSpec,
-):
+class _ICAAROMAInputSpecRPT(nrb._SVGReportCapableInputSpec):
+    aroma_noise_ics = File(
+        exists=True,
+        desc="Noise components estimated by ICA-AROMA, in a comma-separated values file.",
+    )
     out_report = File(
         "ica_aroma_reportlet.svg",
         usedefault=True,
@@ -202,14 +203,11 @@ class _ICAAROMAInputSpecRPT(
     )
 
 
-class _ICAAROMAOutputSpecRPT(
-    reporting.ReportCapableOutputSpec,
-    fsl.aroma.ICA_AROMAOutputSpec,
-):
+class _ICAAROMAOutputSpecRPT(reporting.ReportCapableOutputSpec):
     pass
 
 
-class ICAAROMARPT(reporting.ReportCapableInterface, fsl.ICA_AROMA):
+class ICAAROMARPT(reporting.ReportCapableInterface):
     """Create a reportlet for ICA-AROMA outputs."""
 
     input_spec = _ICAAROMAInputSpecRPT
@@ -224,13 +222,10 @@ def _generate_report(self):
             out_file=self.inputs.out_report,
             compress=self.inputs.compress_report,
             report_mask=self.inputs.report_mask,
-            noise_components_file=self._noise_components_file,
+            noise_components_file=self.inputs.aroma_noise_ics,
         )
 
     def _post_run_hook(self, runtime):
-        outputs = self.aggregate_outputs(runtime=runtime)
-        self._noise_components_file = os.path.join(outputs.out_dir, "classified_motion_ICs.txt")
-
         NIWORKFLOWS_LOG.info("Generating report for ICA AROMA")
 
         return super()._post_run_hook(runtime)
diff --git a/src/fmripost_aroma/workflows/aroma.py b/src/fmripost_aroma/workflows/aroma.py
index bb81f5f..ae46d21 100644
--- a/src/fmripost_aroma/workflows/aroma.py
+++ b/src/fmripost_aroma/workflows/aroma.py
@@ -35,8 +35,6 @@ def init_ica_aroma_wf(
     *,
     bold_file: str,
     metadata: dict,
-    aroma_melodic_dim: int = -200,
-    err_on_aroma_warn: bool = False,
     susan_fwhm: float = 6.0,
 ):
     """Build a workflow that runs `ICA-AROMA`_.
@@ -86,13 +84,6 @@ def init_ica_aroma_wf(
     susan_fwhm : :obj:`float`
         Kernel width (FWHM in mm) for the smoothing step with
         FSL ``susan`` (default: 6.0mm)
-    err_on_aroma_warn : :obj:`bool`
-        Do not fail on ICA-AROMA errors
-    aroma_melodic_dim : :obj:`int`
-        Set the dimensionality of the MELODIC ICA decomposition.
-        Negative numbers set a maximum on automatic dimensionality estimation.
-        Positive numbers set an exact number of components to extract.
-        (default: -200, i.e., estimate <=200 components)
 
     Inputs
     ------
@@ -222,7 +213,7 @@ def init_ica_aroma_wf(
             tr_sec=float(metadata["RepetitionTime"]),
             mm_thresh=0.5,
             out_stats=True,
-            dim=aroma_melodic_dim,
+            dim=config.workflow.melodic_dim,
         ),
         name="melodic",
     )
@@ -242,20 +233,14 @@ def init_ica_aroma_wf(
     # Generate the ICA-AROMA report
     # What steps does this entail?
     aroma_rpt = pe.Node(
-        ICAAROMARPT(
-            denoise_type="nonaggr",
-            generate_report=True,
-            TR=metadata["RepetitionTime"],
-            args="-np",
-        ),
+        ICAAROMARPT(TR=metadata["RepetitionTime"]),
         name="aroma_rpt",
     )
     workflow.connect([
-        (inputnode, aroma_rpt, [
-            ("bold_mask_std", "report_mask"),
-            ("bold_mask_std", "mask"),
-        ]),
+        (inputnode, aroma_rpt, [("bold_mask_std", "report_mask")]),
         (smooth, aroma_rpt, [("smoothed_file", "in_file")]),
+        (melodic, aroma_rpt, [("out_dir", "melodic_dir")]),
+        (ica_aroma, aroma_rpt, [("aroma_noise_ics", "aroma_noise_ics")]),
     ])  # fmt:skip
 
     add_non_steady_state = pe.Node(
@@ -273,12 +258,20 @@ def init_ica_aroma_wf(
 
     # extract the confound ICs from the results
     ica_aroma_confound_extraction = pe.Node(
-        ICAConfounds(err_on_aroma_warn=err_on_aroma_warn),
+        ICAConfounds(
+            err_on_aroma_warn=config.workflow.err_on_warn,
+            orthogonalize=config.workflow.orthogonalize,
+        ),
         name="ica_aroma_confound_extraction",
     )
     workflow.connect([
         (inputnode, ica_aroma_confound_extraction, [("skip_vols", "skip_vols")]),
-        (ica_aroma, ica_aroma_confound_extraction, [("out_dir", "in_directory")]),
+        (melodic, ica_aroma_confound_extraction, [("out_dir", "melodic_dir")]),
+        (ica_aroma, ica_aroma_confound_extraction, [
+            ("aroma_features", "aroma_features"),
+            ("aroma_noise_ics", "aroma_noise_ics"),
+            ("aroma_metadata", "aroma_metadata"),
+        ]),
         (ica_aroma_confound_extraction, outputnode, [
             ("aroma_confounds", "aroma_confounds"),
             ("aroma_noise_ics", "aroma_noise_ics"),
@@ -316,6 +309,84 @@ def init_ica_aroma_wf(
     return workflow
 
 
+def init_denoise_wf(bold_file):
+    """Build a workflow that denoises a BOLD series using AROMA confounds.
+
+    This workflow performs the denoising in the requested output space(s).
+    """
+    from niworkflows.engine.workflows import LiterateWorkflow as Workflow
+
+    if config.workflow.denoise_method == "all":
+        denoise_methods = ["nonaggr", "aggr", "orthaggr"]
+    else:
+        denoise_methods = [config.workflow.denoise_method]
+
+    workflow = Workflow(name=_get_wf_name(bold_file, "denoise"))
+
+    inputnode = pe.Node(
+        niu.IdentityInterface(
+            fields=[
+                "bold_file",
+                "confounds",
+                "name_source",
+                "skip_vols",
+                "spatial_reference",
+            ],
+        ),
+        name="inputnode",
+    )
+
+    rm_non_steady_state = pe.Node(
+        niu.Function(function=_remove_volumes, output_names=["bold_cut"]),
+        name="rm_nonsteady",
+    )
+    workflow.connect([
+        (inputnode, rm_non_steady_state, [
+            ("skip_vols", "skip_vols"),
+            ("bold_file", "bold_file"),
+        ]),
+    ])  # fmt:skip
+
+    for denoise_method in denoise_methods:
+        denoise = pe.Node(
+            ICADenoise(method=denoise_method),
+            name=f"denoise_{denoise_method}",
+        )
+        workflow.connect([
+            (inputnode, denoise, [
+                ("confounds", "confounds_file"),
+                ("skip_vols", "skip_vols"),
+            ]),
+            (rm_non_steady_state, denoise, [("bold_cut", "bold_file")]),
+        ])  # fmt:skip
+
+        add_non_steady_state = pe.Node(
+            niu.Function(function=_add_volumes, output_names=["bold_add"]),
+            name="add_non_steady_state",
+        )
+        workflow.connect([
+            (inputnode, add_non_steady_state, [
+                ("bold_file", "bold_file"),
+                ("skip_vols", "skip_vols"),
+            ]),
+            (denoise, add_non_steady_state, [("denoised_file", "bold_cut_file")]),
+        ])  # fmt:skip
+
+        ds_denoised = pe.Node(
+            DerivativesDataSink(
+                desc=f"{denoise_method}Denoised",
+                datatype="func",
+                dismiss_entities=("echo",),
+            ),
+            name=f"ds_denoised_{denoise_method}",
+            run_without_submitting=True,
+            mem_gb=config.DEFAULT_MEMORY_MIN_GB,
+        )
+        workflow.connect([(add_non_steady_state, ds_denoised, [("bold_add", "denoised_file")])])
+
+    return workflow
+
+
 def _getbtthresh(medianval):
     return 0.75 * medianval