From c2118196c410b3682bd51d8fcbc4f9750fdf2e71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Boris=20Cl=C3=A9net?= Date: Mon, 17 Jun 2024 15:34:57 +0200 Subject: [PATCH] Adding nuisance regressors in the group level analysis --- narps_open/pipelines/team_4SZ2.py | 256 +++++++----------------------- tests/pipelines/test_team_4SZ2.py | 41 ++--- 2 files changed, 70 insertions(+), 227 deletions(-) diff --git a/narps_open/pipelines/team_4SZ2.py b/narps_open/pipelines/team_4SZ2.py index 07ceed47..3a536225 100644 --- a/narps_open/pipelines/team_4SZ2.py +++ b/narps_open/pipelines/team_4SZ2.py @@ -6,6 +6,8 @@ from os.path import join from itertools import product +from numpy import array + from nipype import Workflow, Node, MapNode from nipype.interfaces.utility import IdentityInterface, Function, Split from nipype.interfaces.io import SelectFiles, DataSink @@ -15,12 +17,12 @@ FSLCommand, Cluster ) from nipype.algorithms.modelgen import SpecifyModel -from nipype.interfaces.fsl.maths import MultiImageMaths +from nipype.interfaces.fsl.maths import MathsCommand from narps_open.utils.configuration import Configuration from narps_open.pipelines import Pipeline from narps_open.data.task import TaskInformation -from narps_open.data.participants import get_group +from narps_open.data.participants import get_group, get_participants_information from narps_open.core.common import list_intersection, elements_in_string, clean_list from narps_open.core.interfaces import InterfaceFactory @@ -39,6 +41,11 @@ def __init__(self): ('effect_of_gain', 'T', ['gain', 'loss'], [1, 0]), ('effect_of_loss', 'T', ['gain', 'loss'], [0, 1]) ] + self.group_level_contrasts = [ + ('group_equal_indifference', 'T', ['equalIndifference', 'equalRange'], [1, 0]), + ('group_equal_range', 'T', ['equalIndifference', 'equalRange'], [0, 1]), + ('group_comp', 'T', ['equalIndifference', 'equalRange'], [-1, 1]) + ] def get_preprocessing(self): """ No preprocessing has been done by team 4SZ2 """ @@ -197,59 +204,45 @@ def get_subject_level_analysis(self): """ No subject level analysis has been done by team 4SZ2 """ return None - def get_one_sample_t_test_regressors(subject_list: list) -> dict: - """ - Create dictionary of regressors for one sample t-test group analysis. - - Parameters: - - subject_list: ids of subject in the group for which to do the analysis - - Returns: - - dict containing named lists of regressors. - """ - - return dict(group_mean = [1 for _ in subject_list]) - - def get_two_sample_t_test_regressors( - equal_range_ids: list, - equal_indifference_ids: list, - subject_list: list, - run_list: list - ) -> dict: + @staticmethod + def get_group_level_regressors(subject_list: list): """ Create dictionary of regressors for two sample t-test group analysis. Parameters: - - equal_range_ids: ids of subjects in equal range group - - equal_indifference_ids: ids of subjects in equal indifference group - subject_list: ids of subject for which to do the analysis - - run_list: ids of runs for which to do the analysis + Returns: - regressors, dict: containing named lists of regressors. - groups, list: group identifiers to distinguish groups in FSL analysis. """ - # Create 2 lists containing a value for each run, which is + # Create lists containing regressors for each group (equalRange, equalIndifference) # * 1 if the participant is on the group # * 0 otherwise - equal_range_regressors = [] - equal_indifference_regressors = [] - - for subject_id in subject_list: - value_er = 1 if subject_id in equal_range_ids else 0 - value_ei = 1 if subject_id in equal_indifference_ids else 0 - for _ in run_list: - equal_range_regressors.append(value_er) - equal_indifference_regressors.append(value_ei) + equal_range_group = get_group('equalRange') + equal_indif_group = get_group('equalIndifference') + equal_range_regressor = [1 if s in equal_range_group else 0 for s in subject_list] + equal_indif_regressor = [1 if s in equal_indif_group else 0 for s in subject_list] + + # Get gender and age of participants + participants_data = get_participants_information()[['participant_id', 'gender', 'age']] + participants = participants_data.loc[ + participants_data['participant_id'].isin([f'sub-{s}' for s in subject_list]) + ] + ages = array(participants['age']) + genders = array(participants['gender']) - # Create regressors output : a dict with the two list + # Create regressors output regressors = dict( - equalRange = equal_range_regressors, - equalIndifference = equal_indifference_regressors + equalIndifference = equal_indif_regressor, + equalRange = equal_range_regressor, + age = [int(a) for a in ages], + gender = [1 if i == 'F' else 0 for i in genders] ) - # Create groups outputs : a list with 1 for equalRange subjects and 2 for equalIndifference - groups = [1 if i == 1 else 2 for i in equal_range_regressors] + # Create groups outputs + groups = [1 if i == 1 else 2 for i in equal_range_regressor] return regressors, groups @@ -260,32 +253,13 @@ def get_group_level_analysis(self): Returns; - a list of nipype.WorkFlow """ - - methods = ['equalRange', 'equalIndifference', 'groupComp'] - return [self.get_group_level_analysis_sub_workflow(method) for method in methods] - - def get_group_level_analysis_sub_workflow(self, method): - """ - Return a workflow for the group level analysis. - - Parameters: - - method: one of 'equalRange', 'equalIndifference' or 'groupComp' - - Returns: - - group_level: nipype.WorkFlow - """ # Compute the number of participants in the analysis nb_subjects = len(self.subject_list) - # Compute the number of participants in the group - nb_subjects_in_group = nb_subjects - if method in ['equalIndifference', 'equalRange']: - nb_subjects_in_group = len([s for s in self.subject_list if s in get_group(method)]) - # Declare the workflow group_level = Workflow( base_dir = self.directories.working_dir, - name = f'group_level_analysis_{method}_nsub_{nb_subjects}') + name = f'group_level_analysis_nsub_{nb_subjects}') # Infosource Node - iterate over the contrasts generated by the subject level analysis information_source = Node(IdentityInterface( @@ -323,6 +297,7 @@ def get_group_level_analysis_sub_workflow(self, method): ), name = 'get_copes', iterfield = 'input_str' ) + get_copes.inputs.elements = complete_subject_ids(self.subject_list) group_level.connect(select_files, 'cope', get_copes, 'input_str') # Function Node elements_in_string @@ -336,6 +311,7 @@ def get_group_level_analysis_sub_workflow(self, method): ), name = 'get_varcopes', iterfield = 'input_str' ) + get_varcopes.inputs.elements = complete_subject_ids(self.subject_list) group_level.connect(select_files, 'varcope', get_varcopes, 'input_str') # Function Node elements_in_string @@ -349,6 +325,7 @@ def get_group_level_analysis_sub_workflow(self, method): ), name = 'get_masks', iterfield = 'input_str' ) + get_masks.inputs.elements = complete_subject_ids(self.subject_list) group_level.connect(select_files, 'masks', get_masks, 'input_str') # Merge Node - Merge cope files @@ -356,27 +333,29 @@ def get_group_level_analysis_sub_workflow(self, method): merge_copes.inputs.dimension = 't' group_level.connect(get_copes, ('out_list', clean_list), merge_copes, 'in_files') + # Merge Masks - Merge mask files + merge_masks = Node(Merge(), name = 'merge_masks') + merge_masks.inputs.dimension = 't' + group_level.connect(get_masks, ('out_list', clean_list), merge_masks, 'in_files') + # Merge Node - Merge cope files merge_varcopes = Node(Merge(), name = 'merge_varcopes') merge_varcopes.inputs.dimension = 't' group_level.connect(get_varcopes, ('out_list', clean_list), merge_varcopes, 'in_files') - # Split Node - Split mask list to serve them as inputs of the MultiImageMaths node. - split_masks = Node(Split(), name = 'split_masks') - split_masks.inputs.splits = [1, (nb_subjects_in_group * len(self.run_list)) - 1] - split_masks.inputs.squeeze = True # Unfold one-element splits removing the list - group_level.connect(get_masks, ('out_list', clean_list), split_masks, 'inlist') - - # MultiImageMaths Node - Create a subject mask by + # MathsCommand Node - Create a global mask by # computing the intersection of all run masks. - mask_intersection = Node(MultiImageMaths(), name = 'mask_intersection') - mask_intersection.inputs.op_string = '-mul %s ' * \ - ((nb_subjects_in_group * len(self.run_list)) - 1) - group_level.connect(split_masks, 'out1', mask_intersection, 'in_file') - group_level.connect(split_masks, 'out2', mask_intersection, 'operand_files') + mask_intersection = Node(MathsCommand(), name = 'mask_intersection') + mask_intersection.inputs.args = '-Tmin -thr 0.9' + group_level.connect(merge_masks, 'merged_file', mask_intersection, 'in_file') + + # Get regressors for the group level analysis + regressors, groups = self.get_group_level_regressors(self.subject_list) # MultipleRegressDesign Node - Specify model specify_model = Node(MultipleRegressDesign(), name = 'specify_model') + specify_model.inputs.regressors = regressors + specify_model.inputs.groups = groups # FLAMEO Node - Estimate model estimate_model = Node(FLAMEO(), name = 'estimate_model') @@ -404,123 +383,19 @@ def get_group_level_analysis_sub_workflow(self, method): data_sink = Node(DataSink(), name = 'data_sink') data_sink.inputs.base_directory = self.directories.output_dir group_level.connect(estimate_model, 'zstats', data_sink, - f'group_level_analysis_{method}_nsub_{nb_subjects}.@zstats') + f'group_level_analysis_nsub_{nb_subjects}.@zstats') group_level.connect(estimate_model, 'tstats', data_sink, - f'group_level_analysis_{method}_nsub_{nb_subjects}.@tstats') + f'group_level_analysis_nsub_{nb_subjects}.@tstats') group_level.connect(cluster,'threshold_file', data_sink, - f'group_level_analysis_{method}_nsub_{nb_subjects}.@threshold_file') - - if method in ('equalIndifference', 'equalRange'): - # Setup a one sample t-test - specify_model.inputs.contrasts = [ - ['group_mean', 'T', ['group_mean'], [1]], - ['group_mean_neg', 'T', ['group_mean'], [-1]] - ] - - # Function Node get_group_subjects - Get subjects in the group and in the subject_list - get_group_subjects = Node(Function( - function = list_intersection, - input_names = ['list_1', 'list_2'], - output_names = ['out_list'] - ), - name = 'get_group_subjects' - ) - get_group_subjects.inputs.list_1 = get_group(method) - get_group_subjects.inputs.list_2 = self.subject_list - group_level.connect( - get_group_subjects, ('out_list', complete_subject_ids), get_copes, 'elements') - group_level.connect( - get_group_subjects, ('out_list', complete_subject_ids), get_varcopes, 'elements') - group_level.connect( - get_group_subjects, ('out_list', complete_sub_ids), get_masks, 'elements') - - # Function Node get_one_sample_t_test_regressors - # Get regressors in the equalRange and equalIndifference method case - regressors_one_sample = Node( - Function( - function = self.get_one_sample_t_test_regressors, - input_names = ['subject_list'], - output_names = ['regressors'] - ), - name = 'regressors_one_sample', - ) - regressors_one_sample.inputs.subject_list = range( - nb_subjects_in_group * len(self.run_list)) - group_level.connect(regressors_one_sample, 'regressors', specify_model, 'regressors') - - elif method == 'groupComp': - - # Select copes and varcopes corresponding to the selected subjects - # Indeed the SelectFiles node asks for all (*) subjects available - get_copes.inputs.elements = complete_subject_ids(self.subject_list) - get_varcopes.inputs.elements = complete_subject_ids(self.subject_list) - get_masks.inputs.elements = complete_sub_ids(self.subject_list) - - # Setup a two sample t-test - specify_model.inputs.contrasts = [ - ['equalRange_sup', 'T', ['equalRange', 'equalIndifference'], [1, -1]] - ] - - # Function Node get_equal_range_subjects - # Get subjects in the equalRange group and in the subject_list - get_equal_range_subjects = Node(Function( - function = list_intersection, - input_names = ['list_1', 'list_2'], - output_names = ['out_list'] - ), - name = 'get_equal_range_subjects' - ) - get_equal_range_subjects.inputs.list_1 = get_group('equalRange') - get_equal_range_subjects.inputs.list_2 = self.subject_list - - # Function Node get_equal_indifference_subjects - # Get subjects in the equalIndifference group and in the subject_list - get_equal_indifference_subjects = Node(Function( - function = list_intersection, - input_names = ['list_1', 'list_2'], - output_names = ['out_list'] - ), - name = 'get_equal_indifference_subjects' - ) - get_equal_indifference_subjects.inputs.list_1 = get_group('equalIndifference') - get_equal_indifference_subjects.inputs.list_2 = self.subject_list - - # Function Node get_two_sample_t_test_regressors - # Get regressors in the groupComp method case - regressors_two_sample = Node( - Function( - function = self.get_two_sample_t_test_regressors, - input_names = [ - 'equal_range_ids', - 'equal_indifference_ids', - 'subject_list', - 'run_list' - ], - output_names = ['regressors', 'groups'] - ), - name = 'regressors_two_sample', - ) - regressors_two_sample.inputs.subject_list = self.subject_list - regressors_two_sample.inputs.run_list = self.run_list - - # Add missing connections - group_level.connect( - get_equal_range_subjects, 'out_list', regressors_two_sample, 'equal_range_ids') - group_level.connect( - get_equal_indifference_subjects, 'out_list', - regressors_two_sample, 'equal_indifference_ids') - group_level.connect(regressors_two_sample, 'regressors', specify_model, 'regressors') - group_level.connect(regressors_two_sample, 'groups', specify_model, 'groups') + f'group_level_analysis_nsub_{nb_subjects}.@threshold_file') return group_level def get_group_level_outputs(self): """ Return all names for the files the group level analysis is supposed to generate. """ - # Handle equalRange and equalIndifference parameters = { 'contrast_id': self.contrast_list, - 'method': ['equalRange', 'equalIndifference'], 'file': [ '_cluster0/zstat1_threshold.nii.gz', '_cluster1/zstat2_threshold.nii.gz', @@ -533,34 +408,13 @@ def get_group_level_outputs(self): parameter_sets = product(*parameters.values()) template = join( self.directories.output_dir, - 'group_level_analysis_{method}_nsub_'+f'{len(self.subject_list)}', + 'group_level_analysis_nsub_'+f'{len(self.subject_list)}', '_contrast_id_{contrast_id}', '{file}' ) - return_list = [template.format(**dict(zip(parameters.keys(), parameter_values)))\ - for parameter_values in parameter_sets] - - # Handle groupComp - parameters = { - 'contrast_id': self.contrast_list, - 'file': [ - '_cluster0/zstat1_threshold.nii.gz', - 'tstat1.nii.gz', - 'zstat1.nii.gz' - ] - } - parameter_sets = product(*parameters.values()) - template = join( - self.directories.output_dir, - f'group_level_analysis_groupComp_nsub_{len(self.subject_list)}', - '_contrast_id_{contrast_id}', - '{file}' - ) - return_list += [template.format(**dict(zip(parameters.keys(), parameter_values)))\ + return [template.format(**dict(zip(parameters.keys(), parameter_values)))\ for parameter_values in parameter_sets] - return return_list - def get_hypotheses_outputs(self): """ Return all hypotheses output file names. """ diff --git a/tests/pipelines/test_team_4SZ2.py b/tests/pipelines/test_team_4SZ2.py index 1f3c08ff..8b025db0 100644 --- a/tests/pipelines/test_team_4SZ2.py +++ b/tests/pipelines/test_team_4SZ2.py @@ -38,10 +38,7 @@ def test_create(): assert pipeline.get_preprocessing() is None assert isinstance(pipeline.get_run_level_analysis(), Workflow) assert pipeline.get_subject_level_analysis() is None - group_level = pipeline.get_group_level_analysis() - assert len(group_level) == 3 - for sub_workflow in group_level: - assert isinstance(sub_workflow, Workflow) + assert isinstance(pipeline.get_group_level_analysis(), Workflow) @staticmethod @mark.unit_test @@ -88,28 +85,20 @@ def test_subject_information(): @staticmethod @mark.unit_test - def test_one_sample_t_test_regressors(): - """ Test the get_one_sample_t_test_regressors method """ - - result = PipelineTeam4SZ2.get_one_sample_t_test_regressors(['001', '002', '003', '004']) - assert result == {'group_mean' : [1]*4} - - @staticmethod - @mark.unit_test - def test_two_sample_t_test_regressors(): - """ Test the get_two_sample_t_test_regressors method """ - - result_1, result_2 = PipelineTeam4SZ2.get_two_sample_t_test_regressors( - ['001', '003'], # equal_range_ids - ['002', '004'], # equal_indifference_ids - ['001', '002', '003', '004'], # subject_list - ['01', '02'] # run_list - ) - assert result_1 == { - 'equalRange' : [1, 1, 0, 0, 1, 1, 0, 0], - 'equalIndifference' : [0, 0, 1, 1, 0, 0, 1, 1] - } - assert result_2 == [1, 1, 2, 2, 1, 1, 2, 2] + def test_get_group_level_regressors(): + """ Test the get_group_level_regressors method """ + + regressors, groups = PipelineTeam4SZ2.get_group_level_regressors(['001', '002', '003', '004']) + + for k1, k2 in zip(regressors.keys(), ['equalIndifference', 'equalRange', 'age', 'gender']): + assert k1 == k2 + assert regressors['equalIndifference'] == [1, 0, 1, 0] + assert regressors['equalRange'] == [0, 1, 0, 1] + print(regressors['age']) + print(regressors['gender']) + assert regressors['age'] == [24, 25, 27, 25] + assert regressors['gender'] == [0, 0, 1, 0] + assert groups == [2, 1, 2, 1] @staticmethod @mark.pipeline_test