diff --git a/docs/notebooks/data/reports-spec.yml b/docs/notebooks/data/reports-spec.yml new file mode 100644 index 00000000..aaa8dbce --- /dev/null +++ b/docs/notebooks/data/reports-spec.yml @@ -0,0 +1,38 @@ +package: funconn +sections: +- name: Group report + reportlets: + - bids: + desc: censoring + extension: [.html] + suffix: bold + caption: | + fMRI duration after censoring computed as the number of good timepoints multiplied by the repetition time (TR). + The red line corresponds to the QC cutoff of 5 minutes (300 seconds). Less than 5 minutes of fMRI is not enough to + reliably estimate connectivity. + subtitle: Report about censoring. + - bids: {desc: fcdist, suffix: bold} + caption: Density distributions of within-session FC strengths. The FC distribution from each session are overlaid. + subtitle: Functional connectivity density distributions. + style: + max-width: 600px + + - bids: {desc: qcfc, suffix: bold} + caption: | + QC-FC plots tested functional connectivity associations with three image quality metrics (fd_mean, fd_num, and + fd_perc). Plots were generated from functional data from all sessions. Red dotted lines represent a theoretical + artifact-free null-hypothesis distribution obtained through permutation analyses. QC-FC percent match level represents the distance + between the observed and the null-hypothesis distribution and is computed using the R implementation of the two-sample Kolmogorov-Smirnov + test for goodness of fit. Red label indicates that the QC-FC distribution did not reach above the 95% cutoff. + subtitle: QC-FC correlation distributions. + style: + max-width: 1350px + + - bids: {desc: qcfcvseuclidean, suffix: bold} + caption: | + Correlation between QC-FC and Euclidean distance separating nodes. The euclidean distance is computed from the centers of mass + of each region in the atlas used to compute the FC. + subtitle: Distance-dependent effect of motion. + style: + max-width: 1350px + diff --git a/docs/notebooks/group_report_cli.py b/docs/notebooks/group_report_cli.py new file mode 100644 index 00000000..c764ef88 --- /dev/null +++ b/docs/notebooks/group_report_cli.py @@ -0,0 +1,174 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2023 The Axon Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# + +import argparse +import logging + +import numpy as np +import os.path as op +import pandas as pd + +from itertools import chain + +from load_save import ( + get_atlas_data, + find_atlas_dimension, + find_derivative, + check_existing_output, + get_bids_savename, + get_func_filenames_bids, + load_iqms, + FC_FILLS, + FC_PATTERN +) + +from reports import ( + group_report, +) + + +def get_arguments() -> argparse.Namespace: + parser: argparse.ArgumentParser = argparse.ArgumentParser( + description="""Compute functional connectivity group report from functional connectivity matrices.""", + ) + + # Input/Output arguments and options + parser.add_argument( + "output", + help="path to the directory where the functional connectivity matrices are stored", + ) + parser.add_argument( + "--mriqc-path", + default=None, + help="specify the path to the mriqc derivatives", + ) + parser.add_argument( + "--task", + default=["rest"], + action="store", + nargs="+", + help="a space delimited list of task(s)", + ) + parser.add_argument( + "--fc-estimator", + default="sparse inverse covariance", + action="store", + choices=["correlation", "covariance", "sparse", "sparse inverse covariance"], + type=str, + help="""type of connectivity to compute (can be 'correlation', 'covariance' or + 'sparse')""", + ) + parser.add_argument( + "-v", + "--verbosity", + action="count", + default=1, + help="""increase output verbosity (-v: standard logging infos; -vv: logging + infos and NiLearn verbose; -vvv: debug)""", + ) + + args = parser.parse_args() + + return args + + +def main(): + args = get_arguments() + output = args.output + task_filter = args.task + mriqc_path = args.mriqc_path + fc_label = args.fc_estimator.replace(" ", "") + + verbosity_level = args.verbosity + + logging_level_map = { + 0: logging.WARN, + 1: logging.INFO, + 2: logging.INFO, + 3: logging.DEBUG, + } + + logging.basicConfig( + # filename='example.log', + # format='%(asctime)s %(levelname)s:%(message)s', + format="%(levelname)s: %(message)s", + level=logging_level_map[min([verbosity_level, 3])], + ) + + logging.captureWarnings(True) + + # Find the atlas dimension from the output path + atlas_dimension = find_atlas_dimension(output) + atlas_data = get_atlas_data(dimension=atlas_dimension) + atlas_filename = getattr(atlas_data, "maps") + + # Find all existing functional connectivity + input_path = find_derivative(output) + func_filenames, _ = get_func_filenames_bids(input_path, task_filter=task_filter) + all_filenames = list(chain.from_iterable(func_filenames)) + + existing_fc = check_existing_output( + output, + all_filenames, + return_existing=True, + return_output=True, + patterns=FC_PATTERN, + meas=fc_label, + **FC_FILLS, + ) + if not existing_fc: + filename = op.join( + output, + get_bids_savename( + all_filenames[0], patterns=FC_PATTERN, meas=fc_label, **FC_FILLS + ), + ) + raise ValueError( + f"No functional connectivity of type {filename} were found. Please revise the arguments." + ) + + # Load functional connectivity matrices + fc_matrices = [] + for file_path in existing_fc: + fc_matrices.append(np.loadtxt(file_path, delimiter="\t")) + + # Load fMRI duration after censoring + good_timepoints_df = pd.read_csv( + op.join(output, "fMRI_duration_after_censoring.csv") + ) + + # Load IQMs + iqms_df = load_iqms(output, existing_fc, mriqc_path=mriqc_path) + + # Generate group figures + group_report( + good_timepoints_df, + fc_matrices, + iqms_df, + atlas_filename, + output, + ) + + +if __name__ == "__main__": + main() diff --git a/docs/notebooks/load_save.py b/docs/notebooks/load_save.py new file mode 100644 index 00000000..23a2c5a0 --- /dev/null +++ b/docs/notebooks/load_save.py @@ -0,0 +1,449 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2023 The Axon Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# +"""Python module for loading and saving fMRI related data""" + +import os +import re +import os.path as op +import pandas as pd +from collections import defaultdict +import logging +from typing import Optional, Union + +from bids import BIDSLayout +import numpy as np + +from pandas import read_csv +from nibabel import loadsave +from bids.layout import parse_file_entities +from bids.layout.writing import build_path +from nilearn.datasets import fetch_atlas_difumo +from nilearn.interfaces.fmriprep.load_confounds import _load_single_confounds_file + +FC_PATTERN: list = [ + "sub-{subject}[/ses-{session}]/func/sub-{subject}" + "[_ses-{session}][_task-{task}][_meas-{meas}]" + "_{suffix}{extension}" +] +FC_FILLS: dict = {"suffix": "connectivity", "extension": ".tsv"} + +TIMESERIES_PATTERN: list = [ + "sub-{subject}[/ses-{session}]/func/sub-{subject}" + "[_ses-{session}][_task-{task}][_desc-{desc}]" + "_{suffix}{extension}" +] +TIMESERIES_FILLS: dict = {"desc": "denoised", "extension": ".tsv"} + +CONFOUND_PATTERN: list = [ + "sub-{subject}[_ses-{session}][_task-{task}][_part-{part}][_desc-{desc}]" + "_{suffix}{extension}" +] +CONFOUND_FILLS: dict = {"desc": "confounds", "suffix": "timeseries", "extension": "tsv"} + + +def separate_by_similar_values( + input_list: list, external_value: Optional[Union[list, np.ndarray]] = None +) -> dict: + """This returns elements of `input_list` with similar values (optionally set by + `external_value`) separated into sub-lists. + + Parameters + ---------- + input_list : list + List to be separated. + external_value : Optional[list], optional + Values corresponding to the elements of `input_list`, by default None + + Returns + ------- + dict + Dictionary where each entry is a list of elements that have similar values and + the keys are the value for each list. + """ + if external_value is None: + external_value = input_list + + data_by_value = defaultdict(list) + + for val, data in zip(external_value, input_list): + data_by_value[val].append(data) + return data_by_value + + +def get_func_filenames_bids( + paths_to_func_dir: str, + task_filter: Optional[list] = None, + ses_filter: Optional[list] = None, + run_filter: Optional[list] = None, +) -> tuple[list[list[str]], list[float]]: + """Return the BIDS functional imaging files matching the specified task and session + filters as well as the first (if multiple) unique repetition time (TR). + + Parameters + ---------- + paths_to_func_dir : str + Path to the BIDS (usually derivatives) directory + task_filter : list, optional + List of task name(s) to consider, by default `None` + ses_filter : list, optional + List of session name(s) to consider, by default `None` + run_filter : list, optional + List of run(s) to consider, by default `None` + + Returns + ------- + tuple[list[list[str]], list[float]] + Returns two lists with: a list of sorted filenames and a list of TRs. + """ + logging.debug("Using BIDS to find functional files...") + + layout = BIDSLayout( + paths_to_func_dir, + validate=False, + ) + + all_derivatives = layout.get( + scope="all", + return_type="file", + extension=["nii.gz", "gz"], + suffix="bold", + task=task_filter or [], + session=ses_filter or [], + run=run_filter or [], + ) + + if not all_derivatives: + raise ValueError( + f"No functional derivatives were found under {paths_to_func_dir} with the following filters:" + f"\nExtension: ['nii.gz', 'gz']" + f"\nSuffix: bold" + f"\nTask: {task_filter or []}" + f"\nSession: {ses_filter or []}" + f"\nRun: {run_filter or []}" + ) + + affines = [] + for file in all_derivatives: + affines.append(loadsave.load(file).affine) + + similar_fov_dict = separate_by_similar_values( + all_derivatives, np.array(affines)[:, 0, 0] + ) + if len(similar_fov_dict) > 1: + logging.warning( + f"{len(similar_fov_dict)} different FoV found ! " + "Files with similar FoV will be computed together. " + "Computation time may increase." + ) + + separated_files = [] + separated_trs = [] + for file_group in similar_fov_dict.values(): + t_rs = [] + for file in file_group: + t_rs.append(layout.get_metadata(file)["RepetitionTime"]) + + similar_tr_dict = separate_by_similar_values(file_group, t_rs) + separated_files += list(similar_tr_dict.values()) + separated_trs += list(similar_tr_dict.keys()) + + if len(similar_tr_dict) > 1: + logging.warning( + "Multiple TR values found ! " + "Files with similar TR will be computed together. " + "Computation time may increase." + ) + + return separated_files, separated_trs + + +def get_bids_savename(filename: str, patterns: list, **kwargs) -> str: + """Return the BIDS filename following the specified patterns and modifying the + entities from the keywords arguments. + + Parameters + ---------- + filename : str + Name of the original BIDS file + patterns : list, optional + Patterns for the output file, by default FC_PATTERN + + Returns + ------- + str + BIDS output filename. + """ + entity = parse_file_entities(filename) + + for key, value in kwargs.items(): + entity[key] = value + + bids_savename = build_path(entity, patterns) + + return str(bids_savename) + + +def get_atlas_data(atlas_name: str = "DiFuMo", **kwargs) -> dict: + """Fetch the specifies atlas filename and data. + + Parameters + ---------- + atlas_name : str, optional + Name of the atlas to fetch, by default "DiFuMo" + + Returns + ------- + dict + Dictionary with keys "maps" (filename) and "labels" (ROI labels). + """ + logging.info("Fetching the DiFuMo atlas ...") + + if kwargs["dimension"] not in [64, 128, 512]: + logging.warning( + "Dimension for DiFuMo atlas is different from 64, 128 or 512 ! Are you" + "certain you want to deviate from those optimized modes? " + ) + + return fetch_atlas_difumo(legacy_format=False, **kwargs) + + +def find_atlas_dimension(path: str, atlas_name: str = "DiFuMo") -> int: + """Fetch the atlas dimension from the path where the functional connectivity are saved. + Parameters + ---------- + path : str + Path to the directory where functional connectivity are saved. + atlas_name : str, optional + Name of the atlas to fetch, by default "DiFuMo" + + Returns + ------- + int + Atlas dimension. + """ + + # Using regular expression to extract the number of dimensions + dimension_match = re.search(rf"{atlas_name}(\d+)", path) + + if dimension_match: + return int(dimension_match.group(1)) + else: + raise ValueError( + f"The output path {path} does not contain the expected pattern: {atlas_name} followed by digits." + ) + + +def find_derivative(path: str, derivatives_name: str = "derivatives") -> str: + """Find the corresponding BIDS derivative folder (if existing, otherwise it will be + created). + + Parameters + ---------- + path : str + Path to the BIDS (usually derivatives) dataset. + derivatives_name : str, optional + Name of the derivatives folder, by default "derivatives" + + Returns + ------- + str + Absolute path to the derivative folder. + """ + splitted_path = path.split("/") + try: + while derivatives_name not in splitted_path[-1]: + splitted_path.pop() + except IndexError: + logging.warning( + f'"{derivatives_name}" could not be found on path - ' + f'creating at: {op.join(path, derivatives_name)}"' + ) + return op.join(path, derivatives_name) + + return "/".join(splitted_path) + + +def find_mriqc(path: str) -> str: + """Find the path to the MRIQC folder (if existing, otherwise it will be + created). + + Parameters + ---------- + path : str + Path to the BIDS (usually derivatives) dataset. + + Returns + ------- + str + Absolute path to the mriqc folder. + """ + logging.debug("Searching for MRIQC path...") + derivative_path = find_derivative(path) + + folders = [ + f for f in os.listdir(derivative_path) if op.isdir(op.join(derivative_path, f)) + ] + + mriqc_path = [f for f in folders if "mriqc" in f] + if len(mriqc_path) >= 2: + logging.warning( + f"More than one mriqc derivative folder was found: {mriqc_path}" + f"The first instance {mriqc_path[0]} is used for the computation." + "In case you want to use another mriqc derivative folder, use the --mriqc-path flag" + ) + return op.join(derivative_path, mriqc_path[0]) + + +def reorder_iqms(iqms_df: pd.DataFrame, fc_paths: list[str]): + """Reorder the IQMs according to the list of filenames + + Parameters + ---------- + iqms_df : pd.Dataframe + Dataframe containing the IQMs value for each image + fc_paths : list [str] + List of paths to the functional connectivity matrices + + Returns + ------- + panda.df + Dataframe containing the IQMs dataframe with reordered rows. + """ + iqms_df[["subject", "session", "task"]] = iqms_df["bids_name"].str.extract( + r"sub-(\d+)_ses-(\d+)_task-(\w+)_" + ) + entities_list = [parse_file_entities(filepath) for filepath in fc_paths] + entities_df = pd.DataFrame(entities_list) + + return pd.merge( + entities_df, iqms_df, on=["subject", "session", "task"], how="inner" + ) + + +def load_iqms( + derivative_path: str, + fc_paths: list[str], + mriqc_path: str = None, + mod="bold", + iqms_name: list = ["fd_mean", "fd_num", "fd_perc"], +) -> str: + """Load the IQMs and match their order with the corresponding functional matrix. + + Parameters + ---------- + derivative_path : str + Path to the BIDS dataset's derivatives. + fc_paths : list [str] + List of paths to the functional connectivity matrices + mriqc_path : str, optional + Name of the MRIQC derivative folder, by default None + mod : str, optional + Load the IQMs of that modality + iqms_name : list, optional + Name of the IQMs to find, by default ["fd_mean", "fd_num", "fd_perc"] + + Returns + ------- + panda.df + Dataframe containing the IQMs loaded from the derivatives folder. + """ + # Find the MRIQC folder + if mriqc_path is None: + mriqc_path = find_mriqc(derivative_path) + + # Load the IQMs from the group tsv + iqms_filename = op.join(mriqc_path, f"group_{mod}.tsv") + iqms_df = read_csv(iqms_filename, sep="\t") + # If multi-echo dataset and the IQMs of interest are motion-related, keep only the IQMs from the second echo + if "echo" in iqms_df["bids_name"][0] and all("fd" in i for i in iqms_name): + iqms_df = iqms_df[iqms_df["bids_name"].str.contains("echo-2")] + logging.info( + "In the case of a multi-echo dataset, the IQMs of the second echo are considered." + ) + + # Match the order of the rows in iqms_df with the corresponding FC + iqms_df = reorder_iqms(iqms_df, fc_paths) + + # Keep only the IQMs of interest + iqms_df = iqms_df[iqms_name] + + return iqms_df + + +def check_existing_output( + output: str, + func_filename: list[str], + return_existing: bool = False, + return_output: bool = False, + **kwargs, +) -> tuple[list[str], list[str]]: + """Check for existing output. + + Parameters + ---------- + output : str + Path to the output directory + func_filename : list[str] + Input files to be processed + return_existing : bool, optional + Condition to return the list of input corresponding to existing outputs, by default + False + return_output: bool, optional + Condition to return the path of existing outputs, by default False + + Returns + ------- + tuple[list[str], list[str]] + List of missing data path (optionally, a second list of existing data path) + """ + if return_output == True and return_existing == False: + raise ValueError( + "Setting return_output=True in check_existing_output requires return_existing=True." + ) + + missing_data_filter = [ + not op.exists(op.join(output, get_bids_savename(filename, **kwargs))) + for filename in func_filename + ] + + missing_data = np.array(func_filename)[missing_data_filter] + logging.debug( + f"\t{sum(missing_data_filter)} missing data found for files:" + "\n\t" + "\n\t".join(missing_data) + ) + + if return_existing: + if return_output: + existing_output = [ + op.join(output, get_bids_savename(filename, **kwargs)) + for filename in func_filename + if op.exists(op.join(output, get_bids_savename(filename, **kwargs))) + ] + return existing_output + else: + existing_data = np.array(func_filename)[ + [not fltr for fltr in missing_data_filter] + ] + return missing_data.tolist(), existing_data.tolist() + + return missing_data.tolist() diff --git a/docs/notebooks/reports.py b/docs/notebooks/reports.py new file mode 100644 index 00000000..57fbf1f5 --- /dev/null +++ b/docs/notebooks/reports.py @@ -0,0 +1,453 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +# +# Copyright 2023 The Axon Lab +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We support and encourage derived works from this project, please read +# about our expectations at +# +# https://www.nipreps.org/community/licensing/ +# +"""Python module for functional connectivity visual reports""" + +import logging +import os.path as op + +import matplotlib.pyplot as plt +import nibabel as nib +import numpy as np +import pandas as pd +import plotly.offline as pyo +import seaborn as sns +from nireports.assembler.report import Report +from scipy.stats import pearsonr, ks_2samp +from time import strftime +from uuid import uuid4 + + +FIGURE_PATTERN: list = [ + "sub-{subject}/figures/sub-{subject}[_ses-{session}]" + "[_task-{task}][_meas-{meas}][_desc-{desc}]" + "_{suffix}{extension}", + "sub-{subject}/figures/sub-{subject}[_ses-{session}]" + "[_task-{task}][_desc-{desc}]_{suffix}{extension}", +] +FIGURE_FILLS: dict = {"extension": "png"} + +TS_FIGURE_SIZE: tuple = (50, 25) +FC_FIGURE_SIZE: tuple = (70, 45) +LABELSIZE: int = 42 +N_PERMUTATION: int = 10000 +ALPHA = 0.05 +PERCENT_MATCH_CUT_OFF = 95 +DURATION_CUT_OFF = 300 + +def group_report_censoring(good_timepoints_df, output) -> None: + """ + Generate a group report about censoring. + + This function generates an HTML report that includes an interactive scatterplot + showing the fMRI duration after censoring. The scatterplot includes + error bars for the confidence interval and a red line indicating a duration cutoff. + + Parameters: + ----------- + good_timepoints_df: pd.Dataframe + A DataFrame containing information the fMRI duration after censoring. + output : str + Path to the output directory + """ + filenames = good_timepoints_df["filename"] + durations = good_timepoints_df["duration"] + + # Constructing the data for the plot + # Add jitter to x values + jitter = 0.2 # adjust this value to change the amount of jitter + x_values = [1 + np.random.uniform(-jitter, jitter) for _ in range(len(durations))] + data = [ + { + "x": x_values, + "y": durations, + "text": filenames, + "mode": "markers", + "type": "scatter", + "hoverinfo": "text", + "marker": {"opacity": 0.5}, + } + ] + + # Adding a red line at 5 minutes + red_line = { + "type": "line", + "x0": 0, + "y0": DURATION_CUT_OFF, + "x1": 1.5, + "y1": DURATION_CUT_OFF, + "line": {"color": "red", "width": 3, "dash": "dashdot"}, + } + + # Layout settings + layout = { + "hovermode": "closest", + "title": "Duration of fMRI signal after censoring", + "yaxis": {"title": "Duration [s]"}, + "xaxis": {"showticklabels": False, "range": [0.5, 1.5]}, + "shapes": [red_line], + "width": 600, + "height": 600, + "font": {"size": 16}, + "annotations": [ + { + "x": 0.8, + "y": DURATION_CUT_OFF - DURATION_CUT_OFF / 55, + "xref": "x", + "yref": "y", + "text": f"QC cutoff of {DURATION_CUT_OFF/60} min", + "showarrow": False, + "font": {"color": "red"}, + } + ], + } + + fig = {"data": data, "layout": layout} + + # Save the plot as an HTML file + pyo.plot( + fig, + filename=op.join(output, "reportlets", "group_desc-censoring_bold.html"), + auto_open=False, + ) + + +def group_report_fc_dist( + fc_matrices: list[np.ndarray], + output: str, +) -> None: + """Plot and save the functional connectivity density distributions. + + Parameters + ---------- + fc_matrices : list[np.ndarray] + List of functional connectivity matrices + output : str + Path to the output directory + """ + + _, ax = plt.subplots(figsize=FC_FIGURE_SIZE) + + for fc_matrix in fc_matrices: + sns.displot( + fc_matrix, + kind="kde", + fill=True, + linewidth=0.5, + legend=False, + palette="ch:s=.25,rot=-.25", + ) + + ax.tick_params(labelsize=LABELSIZE) + + # Ensure the labels are within the figure + plt.tight_layout() + + savename = op.join("reportlets", "group_desc-fcdist_bold.svg") + + logging.debug("Saving functional connectivity distribution visual report at:") + logging.debug(f"\t{op.join(output, savename)}") + + plt.savefig(op.join(output, savename)) + plt.close() + + +def group_reportlet_fc_dist( + fc_matrices: list[np.ndarray], + output: str, +) -> None: + """Plot and save the functional connectivity density distributions. + + Parameters + ---------- + fc_matrices : list[np.ndarray] + List of functional connectivity matrices + output : str + Path to the output directory + """ + + _, ax = plt.subplots(figsize=FC_FIGURE_SIZE) + + for fc_matrix in fc_matrices: + sns.displot( + fc_matrix, + kind="kde", + fill=True, + linewidth=0.5, + legend=False, + palette="ch:s=.25,rot=-.25", + ) + + ax.tick_params(labelsize=LABELSIZE) + + # Ensure the labels are within the figure + plt.tight_layout() + + savename = op.join("reportlets", "group_desc-fcdist_bold.svg") + + logging.debug("Saving functional connectivity distribution visual report at:") + logging.debug(f"\t{op.join(output, savename)}") + + plt.savefig(op.join(output, savename)) + plt.close() + + +def group_reportlet_qc_fc( + fc_matrices: list[np.ndarray], + iqms_df: pd.DataFrame, + output: str, +) -> dict: + """Plot and save the QC-FC distributions. + + Parameters + ---------- + fc_matrices : list[np.ndarray] + List of functional connectivity matrices + iqms_df : pd.Dataframe + Dataframe containing the image quality metrics to correlate with + output : str + Path to the output directory + """ + + # Stack the list of arrays into a 3D matrix + fc_matrices = np.stack(fc_matrices, axis=2) + + # Keep only upper triangle as the matrix is symmetric + upper_triangle_indices = np.triu_indices(fc_matrices.shape[0], k=1) + fc_matrices = fc_matrices[upper_triangle_indices] + + if fc_matrices.shape[1] != iqms_df.shape[0]: + raise ValueError( + "The number of functional connectivity matrices and IQMs do not match." + ) + + if fc_matrices.shape[1] == 1: + raise ValueError( + "We need at least two functional connectivity matrices to be able to compute its correlation with IQMs." + ) + + fig, axs = plt.subplots(1, 3, figsize=FC_FIGURE_SIZE) + + # Iterate over each IQM + qc_fc_dict = dict() + for i, iqm_column in enumerate(iqms_df.columns): + # Create an empty list to store correlations for each edge + qc_fcs = [] + + # Iterate over each edge + logging.debug("Compute QC-FC correlation for each edge.") + for e in range(fc_matrices.shape[0]): + qc_fc = np.corrcoef(fc_matrices[e, :], iqms_df[iqm_column])[0, 1] + qc_fcs.append(qc_fc) + + # Create a density distribution plot for the current IQM + logging.debug("Create the density distribution plot.") + sns.kdeplot( + qc_fcs, fill=True, label="QC-FC distribution", linewidth=3, ax=axs[i] + ) + + # Save the QC-FC distributions in a dictionary + qc_fc_dict[iqm_column] = qc_fcs + + ## Permutation analyses + logging.debug("Compute QC-FC distribution under the null hypothesis.") + correlations_null = [] + for e in range(fc_matrices.shape[0]): + for _ in range(N_PERMUTATION): + permuted_fc = fc_matrices[ + e, np.random.default_rng(seed=42).permutation(fc_matrices.shape[1]) + ] + # Correlation under null hypothesis + correlation = np.corrcoef(permuted_fc, iqms_df[iqm_column])[0, 1] + correlations_null.append(correlation) + + # Create a density distribution plot for null distribution + logging.debug("Create the density distribution plot for the null distribution.") + sns.kdeplot( + correlations_null, + fill=False, + color="red", + label="Dist under null hypothesis", + linewidth=3, + linestyle="dashed", + ax=axs[i], + ) + plt.legend(fontsize=LABELSIZE + 2) + + # Compute percent match between the two distributions + logging.debug("Compute percent match between the two distributions.") + ks_statistic, _ = ks_2samp(qc_fcs, correlations_null) + percent_match_ks = (1 - ks_statistic) * 100 + + # Plot the box in red if the correlation is significant + facecolor = "red" if percent_match_ks < PERCENT_MATCH_CUT_OFF else "grey" + axs[i].text( + 0.08, + 0.9, + f"QC-FC% = {percent_match_ks:.0f}", + fontsize=LABELSIZE - 2, + bbox=dict(facecolor=facecolor, alpha=0.4, boxstyle="round,pad=0.5"), + transform=axs[i].transAxes, + ) + axs[i].tick_params(labelsize=LABELSIZE) + axs[i].set_title(iqm_column, fontsize=LABELSIZE + 2) + + # Turn off individual plot y-labels + for ax in axs: + ax.set_ylabel("") + + fig.supylabel("Density", fontsize=LABELSIZE + 2) + fig.suptitle("QC-FC correlation distributions", fontsize=LABELSIZE + 4) + + savename = op.join("reportlets", "group_desc-qcfc_bold.svg") + + logging.debug("Saving QC-FC visual report at:") + logging.debug(f"\t{op.join(output, savename)}") + + plt.savefig(op.join(output, savename)) + plt.close() + + return qc_fc_dict + + +def compute_distance(atlas_path: str) -> np.array: + """Compute the euclidean distance between the center of mass of the atlas regions. + + Parameters + ---------- + atlas_path : str + Path to the atlas Nifti + Returns + ------- + np.array + Distance matrix + """ + from scipy.ndimage.measurements import center_of_mass + + logging.debug("Compute distance matrix from atlas centers of mass") + + atlas_img = nib.load(atlas_path) + atlas_data = atlas_img.get_fdata() + # Array to store the center of mass of each region + centroids = np.zeros((atlas_data.shape[3], 3), dtype=float) + for r in range(atlas_data.shape[3]): + centroids[r, ...] = np.array(center_of_mass(atlas_data[..., r])) + + # Compute Euclidean distance matrix using broadcasting + diff = centroids[:, np.newaxis, :] - centroids + distance_matrix = np.sqrt(np.sum(diff**2, axis=-1)) + + return distance_matrix + + +def group_reportlet_qc_fc_euclidean( + qc_fc_dict: dict, atlas_path: str, output: str +) -> None: + """Plot and save the correlations between QC-FC and euclidean distance. + The euclidean distance is computed from the centers of mass of each region. + + Parameters + ---------- + qc_fc_dict : dict + Dictionary containing the qc-fc distribution over the edges for different IQMs. + atlas_path : str + Path to the atlas Nifti + output : str + Path to the output directory + """ + d = compute_distance(atlas_path) + # Keep only upper triangle as the matrix is symmetric + upper_triangle_indices = np.triu_indices(d.shape[0], k=1) + d = d[upper_triangle_indices] + + # Iterate over the IQMs + fig, axs = plt.subplots(1, 3, figsize=FC_FIGURE_SIZE) + for i, iqm in enumerate(qc_fc_dict.keys()): + qc_fc = qc_fc_dict[iqm] + + logging.debug("Compute the correlation between QC-FC and euclidean distance.") + correlation, p_value = pearsonr(qc_fc, d) + + # Plot the box in red if the correlation is significant + facecolor = "red" if p_value < ALPHA else "grey" + axs[i].text( + 0.15, + 0.97, + f"Correlation = {correlation:.2f}, p-value = {p_value:.4f}", + fontsize=LABELSIZE - 2, + bbox=dict(facecolor=facecolor, alpha=0.4, boxstyle="round,pad=0.5"), + transform=axs[i].transAxes, + ) + axs[i].scatter(qc_fc, d) + + # Plot trend line + axs[i].plot( + np.unique(qc_fc), + np.poly1d(np.polyfit(qc_fc, d, 1))(np.unique(qc_fc)), + "r-", + linewidth=3, + ) + axs[i].tick_params(labelsize=LABELSIZE) + axs[i].set_title(iqm, fontsize=LABELSIZE + 2) + + fig.suptitle( + "Dependence between euclidean distance and QC-FC", fontsize=LABELSIZE + 4 + ) + fig.supxlabel("QC-FC correlation of edge between nodes", fontsize=LABELSIZE + 2) + fig.supylabel("Euclidean distance separating nodes", fontsize=LABELSIZE + 2) + # Ensure the labels are within the figure + # plt.tight_layout() + + savename = op.join("reportlets", "group_desc-qcfcvseuclidean_bold.svg") + + logging.debug("Saving QC-FC vs euclidean distance visual report at:") + logging.debug(f"\t{op.join(output, savename)}") + + plt.savefig(op.join(output, savename)) + plt.close() + +def group_report( + good_timepoints_df: pd.DataFrame, + fc_matrices: list[np.ndarray], + iqms_df: pd.DataFrame, + atlas_filename: str, + output: str, +) -> None: + """Generate a group report.""" + + # Generate each reportlets + group_report_censoring(good_timepoints_df, output) + group_reportlet_fc_dist(fc_matrices, output) + qc_fc_dict = group_reportlet_qc_fc(fc_matrices, iqms_df, output) + group_reportlet_qc_fc_euclidean(qc_fc_dict, atlas_filename, output) + + # Assemble reportlets into a single HTML report + logging.debug("Assemble the group report into a single HTML report.") + + run_uuid = "{}_{}".format(strftime("%Y%m%d-%H%M%S"), uuid4()) + robj = Report( + output, + run_uuid, + out_filename="group_report.html", + bootstrap_file=op.join("data", "reports-spec.yml"), + ) + robj.generate_report() \ No newline at end of file