Skip to content

Commit

Permalink
Merge pull request #316 from CoBrALab/id_flagging
Browse files Browse the repository at this point in the history
Id flagging
  • Loading branch information
Gab-D-G authored Sep 26, 2023
2 parents 2d3f0b2 + a7262d7 commit 6d54e97
Show file tree
Hide file tree
Showing 9 changed files with 269 additions and 147 deletions.
27 changes: 25 additions & 2 deletions rabies/analysis_pkg/main_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,18 @@ def init_main_analysis_wf(preprocess_opts, cr_opts, analysis_opts):

split_dict, split_name_list, target_list = read_confound_workflow(conf_output, nativespace=cr_opts.nativespace_analysis)

# update split_name according to the --scan_list option
split_name_list = get_iterable_scan_list(analysis_opts.scan_list, split_name_list)
if len(split_name_list)==0:
raise ValueError(f"""
No outputs were founds from the confound correction stage.
All scans may have been removed for not meeting the minimum_timepoint threshold
when applying --frame_censoring. Outputs will be named empty.nii.gz if this is
the case.
""")

# filter inclusion/exclusion lists
from rabies.utils import filter_scan_inclusion, filter_scan_exclusion
split_name_list = filter_scan_inclusion(analysis_opts.inclusion_ids, split_name_list)
split_name_list = filter_scan_exclusion(analysis_opts.exclusion_ids, split_name_list)

# setting up iterables from the BOLD scan splits
main_split = pe.Node(niu.IdentityInterface(fields=['split_name']),
Expand Down Expand Up @@ -310,6 +320,8 @@ def load_sub_input_dict(maps_dict, bold_file, CR_data_dict, VE_file, STD_file, C


def read_confound_workflow(conf_output, nativespace=False):
from nipype import logging
log = logging.getLogger('nipype.workflow')

conf_workflow_file = f'{conf_output}/rabies_confound_correction_workflow.pkl'

Expand Down Expand Up @@ -358,15 +370,26 @@ def read_confound_workflow(conf_output, nativespace=False):

# don't include scans that were removed during confound correction
corrected_split_name=[]
remove_list = []
import pathlib
for name in split_name:
filename = pathlib.Path(split_dict[name]['cleaned_path']).name
if 'empty' in filename:
remove_list.append(name)
del split_dict[name]
else:
corrected_split_name.append(name)
split_name = corrected_split_name

if len(remove_list)>0:
scan_list_str = ''
for name in remove_list:
scan_list_str += f'\n - {name}'
log.warning(f"""
The following scans were not included for analysis as the file was empty: {scan_list_str}
This is likely due to not meeting the minimum_timepoints threshold from --frame_censoring.
""")

return split_dict, split_name, target_list


Expand Down
5 changes: 5 additions & 0 deletions rabies/confound_correction_pkg/main_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ def init_main_confound_correction_wf(preprocess_opts, cr_opts):
else:
split_dict, split_name, target_list = read_preproc_workflow(preproc_output, nativespace=cr_opts.nativespace_analysis)

# filter inclusion/exclusion lists
from rabies.utils import filter_scan_inclusion, filter_scan_exclusion
split_name = filter_scan_inclusion(cr_opts.inclusion_ids, split_name)
split_name = filter_scan_exclusion(cr_opts.exclusion_ids, split_name)

# setting up iterables from the BOLD scan splits
main_split = pe.Node(niu.IdentityInterface(fields=['split_name']),
name="main_split")
Expand Down
42 changes: 40 additions & 2 deletions rabies/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,33 @@ def get_parser():
description=
"Options for parallel execution and memory management."
)
g_execution.add_argument(
'--inclusion_ids', type=str,
nargs="*", # 0 or more values expected => creates a list
default=['all'],
help=
"Define a list of BOLD scan to include, i.e. run the pipeline on a subset of the data. \n"
"To do so, provide the full path to the corresponding BOLD file in the input BIDS folder. The list \n"
"of scan can be specified manually as a list of file name '--scan_list scan1.nii.gz \n"
"scan2.nii.gz ...' or the files can be imbedded into a .txt file with one filename per row.\n"
"By default, 'all' the scans found in the input BIDS directory or from the previous \n"
"processing step. This can be provided at any processing stage.\n"
"***NOTE: do not enter this parameter right before the processing stage (preprocess, etc...), this will cause \n"
"parsing errors. Instead, provide another parameter after --inclusion_ids (e.g. --verbose or -p). \n"
"(default: %(default)s)\n"
"\n"
)
g_execution.add_argument(
'--exclusion_ids', type=str,
nargs="*", # 0 or more values expected => creates a list
default=['none'],
help=
"Instead of providing a list of scans to include, this argument provides a list of scans to exclude (while \n"
"keeping all other scans). This argument follows the same syntax rules as --includion_ids. --exclusion_ids \n"
"and --inclusion_ids cannot be used simultaneously. \n"
"(default: %(default)s)\n"
"\n"
)
g_execution.add_argument(
"-p", "--plugin", default='Linear',
choices=['Linear', 'MultiProc', 'SGE', 'SGEGraph',
Expand Down Expand Up @@ -129,6 +156,14 @@ def get_parser():
"(default: %(default)s)\n"
"\n"
)
g_execution.add_argument(
"-f", "--force", dest='force', action='store_true',
help=
"The pipeline will not stop if previous outputs are encountered. \n"
"Previous outputs will be overwritten.\n"
"(default: %(default)s)\n"
"\n"
)


####Preprocessing
Expand Down Expand Up @@ -958,8 +993,11 @@ def get_parser():
return parser


def read_parser(parser):
opts = parser.parse_args()
def read_parser(parser, args):
if args is None:
opts = parser.parse_args()
else:
opts = parser.parse_args(args)

if opts.rabies_stage == 'preprocess':
opts.anat_inho_cor = parse_argument(opt=opts.anat_inho_cor,
Expand Down
2 changes: 1 addition & 1 deletion rabies/preprocess_pkg/main_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def init_main_wf(data_dir_path, output_folder, opts, name='main_wf'):
bids.config.set_option('extension_initial_dot', True)
layout = bids.layout.BIDSLayout(data_dir_path, validate=False)
split_name, scan_info, run_iter, scan_list, bold_scan_list = prep_bids_iter(
layout, opts.bold_only)
layout, opts.bold_only, inclusion_list=opts.inclusion_ids, exclusion_list=opts.exclusion_ids)

# setting up all iterables
main_split = pe.Node(niu.IdentityInterface(fields=['split_name', 'scan_info']),
Expand Down
15 changes: 14 additions & 1 deletion rabies/preprocess_pkg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from rabies.utils import run_command

def prep_bids_iter(layout, bold_only=False):
def prep_bids_iter(layout, bold_only=False, inclusion_list=['all'], exclusion_list=['none']):
'''
This function takes as input a BIDSLayout, and generates iteration lists
for managing the workflow's iterables depending on whether --bold_only is
Expand Down Expand Up @@ -38,6 +38,19 @@ def prep_bids_iter(layout, bold_only=False):
raise ValueError(
"No functional file with the suffix 'bold' were found among the BIDS directory.")

# filter inclusion/exclusion lists
from rabies.utils import filter_scan_inclusion, filter_scan_exclusion
boldname_list=[pathlib.Path(bold.filename).name.rsplit(".nii")[0] for bold in bold_bids]
updated_split_name = filter_scan_inclusion(inclusion_list, boldname_list)
updated_split_name = filter_scan_exclusion(exclusion_list, updated_split_name)

filtered_bold_bids=[]
for name in updated_split_name:
for bold in bold_bids:
if name in bold.filename:
filtered_bold_bids.append(bold)
bold_bids = filtered_bold_bids

bold_dict = {}
for bold in bold_bids:
sub = bold.get_entities()['subject']
Expand Down
18 changes: 13 additions & 5 deletions rabies/run_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
rabies_path = os.environ['HOME']+'/.local/share/rabies'


def execute_workflow():
def execute_workflow(args=None):
# generates the parser CLI and execute the workflow based on specified parameters.
parser = get_parser()
opts = read_parser(parser)
opts = read_parser(parser, args)

try: # convert the output path to absolute if not already the case
opts.output_dir = os.path.abspath(str(opts.output_dir))
Expand All @@ -40,6 +40,13 @@ def execute_workflow():
args += input
log.info(args)

# inclusion/exclusion list are incompatible parameters
if (not opts.inclusion_ids[0]=='all') and (not opts.exclusion_ids[0]=='none'):
raise ValueError(f"""
Either an inclusion list (--inclusion_ids) or exclusion list (--exclusion_ids)
can be provided, not both.
""")

if opts.rabies_stage == 'preprocess':
workflow = preprocess(opts, log)
elif opts.rabies_stage == 'confound_correction':
Expand Down Expand Up @@ -72,12 +79,13 @@ def execute_workflow():

def prep_logging(opts, output_folder):
cli_file = f'{output_folder}/rabies_{opts.rabies_stage}.pkl'
if os.path.isfile(cli_file):
if os.path.isfile(cli_file) and not opts.force:
raise ValueError(f"""
A previous run was indicated by the presence of {cli_file}.
This can lead to inconsistencies between previous outputs and the log files.
To prevent this, you are required to manually remove {cli_file}, and we
recommend also removing previous datasinks from the {opts.rabies_stage} RABIES step.
To prevent this, we recommend removing previous datasinks from the {opts.rabies_stage}
RABIES stage. To continue with your execution, the {cli_file} file must be
removed (use --force to automatically do so).
""")

# remove old versions of the log if already existing
Expand Down
115 changes: 115 additions & 0 deletions rabies/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,71 @@ def flatten_list(l):
return l


def filter_scan_exclusion(exclusion_list, split_name):
# the function removes a list of scan IDs from split_name

# exclusion_list: the input provided by the user
# split_name: a list of all scan IDs that were found

import numpy as np
import pandas as pd
if os.path.isfile(os.path.abspath(exclusion_list[0])):
updated_split_name=[]
if not '.nii' in pathlib.Path(exclusion_list[0]).name:
# read the file as a .txt
exclusion_list = np.array(pd.read_csv(os.path.abspath(exclusion_list[0]), header=None)).flatten()
for split in split_name:
exclude = False
for scan in exclusion_list:
if split in scan:
exclude = True
if not exclude:
updated_split_name.append(split)
elif exclusion_list[0]=='none':
updated_split_name = split_name
else:
raise ValueError(f"The --exclusion_ids {exclusion_list} input had improper format. It must the full path to a .txt or .nii files.")

if len(updated_split_name)==0:
raise ValueError(f"""
No scans are left after scan exclusion!
""")

return updated_split_name


def filter_scan_inclusion(inclusion_list, split_name):
# the function will update the list of scan IDs in split_name to correspond to inclusion/exclusion list

# inclusion_list: the input provided by the user
# split_name: a list of all scan IDs that were found

import numpy as np
import pandas as pd
if os.path.isfile(os.path.abspath(inclusion_list[0])):
updated_split_name=[]
if '.nii' in pathlib.Path(inclusion_list[0]).name:
for scan in inclusion_list:
updated_split_name.append(find_split(scan, split_name))
else:
# read the file as a .txt
inclusion_list = np.array(pd.read_csv(os.path.abspath(inclusion_list[0]), header=None)).flatten()
for scan in inclusion_list:
updated_split_name.append(find_split(scan, split_name))
elif inclusion_list[0]=='all':
updated_split_name = split_name
else:
raise ValueError(f"The --inclusion_ids {inclusion_list} input had improper format. It must the full path to a .txt or .nii files, or 'all' to keep all scans.")
return updated_split_name


def find_split(scan, split_name):
for split in split_name:
if split in scan:
return split
raise ValueError(f"No previous file name is matching {scan}")


######################
#FUNCTIONS TO READ WORKFLOW GRAPH
######################
Expand Down Expand Up @@ -437,3 +502,53 @@ def fill_node_dict(d, key_l, e):
return d
else:
return e


######################
#DEBUGGING
######################

def generate_token_data(tmppath, number_scans):
# this function generates fake scans at low resolution for quick testing and debugging

os.makedirs(tmppath+'/inputs', exist_ok=True)

if 'XDG_DATA_HOME' in os.environ.keys():
rabies_path = os.environ['XDG_DATA_HOME']+'/rabies'
else:
rabies_path = os.environ['HOME']+'/.local/share/rabies'

template = f"{rabies_path}/DSURQE_40micron_average.nii.gz"
mask = f"{rabies_path}/DSURQE_40micron_mask.nii.gz"

spacing = (float(1), float(1), float(1)) # resample to 1mmx1mmx1mm
resampled_template = resample_image_spacing(sitk.ReadImage(template), spacing)
# generate template masks
resampled_mask = resample_image_spacing(sitk.ReadImage(mask), spacing)
array = sitk.GetArrayFromImage(resampled_mask)
array[array < 1] = 0
array[array > 1] = 1
binarized = sitk.GetImageFromArray(array, isVector=False)
binarized.CopyInformation(resampled_mask)
sitk.WriteImage(binarized, tmppath+'/inputs/token_mask.nii.gz')
array[:, :, :6] = 0
binarized = sitk.GetImageFromArray(array, isVector=False)
binarized.CopyInformation(resampled_mask)
sitk.WriteImage(binarized, tmppath+'/inputs/token_mask_half.nii.gz')

# generate fake scans from the template
array = sitk.GetArrayFromImage(resampled_template)
array_4d = np.repeat(array[np.newaxis, :, :, :], 15, axis=0)

for i in range(number_scans):
# generate anatomical scan
sitk.WriteImage(resampled_template, tmppath+f'/inputs/sub-token{i+1}_T1w.nii.gz')
# generate functional scan
array_4d_ = array_4d + np.random.normal(0, array_4d.mean()
/ 100, array_4d.shape) # add gaussian noise
sitk.WriteImage(sitk.GetImageFromArray(array_4d_, isVector=False),
tmppath+f'/inputs/sub-token{i+1}_bold.nii.gz')

# necessary to read matrix orientation properly at the analysis stage
sitk.WriteImage(copyInfo_4DImage(sitk.ReadImage(tmppath+f'/inputs/sub-token{i+1}_bold.nii.gz'), sitk.ReadImage(tmppath
+ f'/inputs/sub-token{i+1}_T1w.nii.gz'), sitk.ReadImage(tmppath+f'/inputs/sub-token{i+1}_bold.nii.gz')), tmppath+f'/inputs/sub-token{i+1}_bold.nii.gz')
Loading

0 comments on commit 6d54e97

Please sign in to comment.