diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b04600a --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +sandbox diff --git a/Dockerfile b/Dockerfile index 1f2e1ce..370cc9d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,17 +12,17 @@ COPY manifest.json ${FLYWHEEL}/manifest.json ENTRYPOINT ["/flywheel/v0/fw_heudiconv_run.py"] # Copy over python scripts that generate the BIDS hierarchy -RUN apt-get -y update -RUN apt-get install -y zip -RUN pip install --no-cache heudiconv flywheel-sdk pandas +RUN apt-get -y update && apt-get install -y curl +RUN curl -sL https://deb.nodesource.com/setup_10.x | bash +RUN apt-get -y update && apt-get install -y zip nodejs +RUN npm install -g bids-validator +RUN pip install --no-cache heudiconv nipype flywheel-sdk pandas + COPY . /src + RUN cd /src \ && pip install . \ - && pip install --no-cache --no-deps heudiconv \ - && pip install --no-cache flywheel-sdk \ - && pip install --no-cache nipype \ - && rm -rf /src \ - && apt-get install -y --no-install-recommends zip + && rm -rf /src COPY fw_heudiconv_run.py /flywheel/v0/fw_heudiconv_run.py RUN chmod +x ${FLYWHEEL}/* diff --git a/fw_heudiconv/cli/clear.py b/fw_heudiconv/cli/clear.py new file mode 100644 index 0000000..75c104b --- /dev/null +++ b/fw_heudiconv/cli/clear.py @@ -0,0 +1,170 @@ +import argparse +import flywheel +import logging +import warnings +import sys + +with warnings.catch_warnings(): + warnings.simplefilter('ignore') + from fw_heudiconv.cli.export import get_nested + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('fw-heudiconv-clearer') + + +def clear_bids(client, project_label, session_labels=None, subject_labels=None, dry_run=False, file_types = ['.nii', '.bval', '.bvec']): + + logger.info("Querying Flywheel server...") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + project_obj = client.projects.find_first('label="{}"'.format(project_label)) + + if project_obj is None: + logger.error("Project not found! Maybe check spelling...?") + return 1 + + logger.debug('\tFound project: \n\t\t%s (%s)', project_obj['label'], project_obj.id) + sessions = client.get_project_sessions(project_obj.id) + + # filters + if subject_labels: + sessions = [s for s in sessions if s.subject['label'] in subject_labels] + if session_labels: + sessions = [s for s in sessions if s.label in session_labels] + + if not sessions: + logger.error("No sessions found!") + return 1 + + logger.info('\tFound subjects:\n\t\t%s', + "\n\t\t".join(set(['%s (%s)' % (ses.subject.label, ses.subject.id) for ses in sessions]))) + + logger.info('\tFound sessions:\n\t\t%s', + "\n\t\t".join(['%s (%s)' % (ses['label'], ses.id) for ses in sessions])) + + file_list = [] + for ses in sessions: + + acquisitions = ses.acquisitions() + + for acq in acquisitions: + + files = [f.to_dict() for f in acq.files if any([x in f.name for x in file_types])] + + files = [f for f in files if get_nested(f, 'info', 'BIDS') != 'NA' and get_nested(f, 'info', 'BIDS') is not None and get_nested(f, 'info', 'BIDS', 'Filename') != ''] + + if files: + file_list.append({acq.id: files}) + + fnames = [] + for x in file_list: + for k, v in x.items(): + for u in v: + name = get_nested(u, 'info', 'BIDS', 'Filename') + if name is not None: + fnames.append(name) + + if file_list: + logger.debug("This will remove BIDS data from %d files:\n\t%s" % (len(file_list), "\n\t".join([x for x in fnames]))) + + + if not dry_run: + logger.info('\t\t=======: Removing BIDS data :=======\n') + + for acq_files in file_list: + + for k, v in acq_files.items(): + acq = client.get(k) + + for fi in v: + + BIDS = get_nested(fi, 'info', 'BIDS') + new_bids = {k:'' for k,v in BIDS.items()} + acq.update_file_info(fi['name'], {'BIDS': new_bids}) + + else: + logger.info("Disable `dry_run` mode to apply these changes and remove the BIDS information.") + + else: + logger.info("No BIDS data to remove! (That was easy...)") + + return 0 + + +def get_parser(): + + parser = argparse.ArgumentParser( + description="Go nuclear: clear BIDS data from Flywheel") + parser.add_argument( + "--project", + help="The project in flywheel", + nargs="+", + required=True + ) + parser.add_argument( + "--subject", + help="The subject label(s)", + nargs="+", + default=None + ) + parser.add_argument( + "--session", + help="The session label(s)", + nargs="+", + default=None + ) + parser.add_argument( + "--verbose", + help="Print ongoing messages of progress", + action='store_true', + default=False + ) + parser.add_argument( + "--dry-run", + help="Don't apply changes", + action='store_true', + default=False + ) + parser.add_argument( + "--api-key", + help="API Key", + action='store', + default=None + ) + + return parser + + +def main(): + + logger.info("{:=^70}\n".format(": fw-heudiconv clearer starting up :")) + parser = get_parser() + args = parser.parse_args() + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + if args.api_key: + fw = flywheel.Client(args.api_key) + else: + fw = flywheel.Client() + assert fw, "Your Flywheel CLI credentials aren't set!" + + # Print a lot if requested + if args.verbose: + logger.setLevel(logging.DEBUG) + + project_label = ' '.join(args.project) + status = clear_bids(client=fw, + project_label=project_label, + session_labels=args.session, + subject_labels=args.subject, + dry_run=args.dry_run) + + logger.info("Done!") + logger.info("{:=^70}".format(": Exiting fw-heudiconv clearer :")) + sys.exit(status) + +if __name__ == '__main__': + main() diff --git a/fw_heudiconv/cli/curate.py b/fw_heudiconv/cli/curate.py index e84887f..ffa6444 100644 --- a/fw_heudiconv/cli/curate.py +++ b/fw_heudiconv/cli/curate.py @@ -4,15 +4,16 @@ import argparse import warnings import flywheel +import pprint from collections import defaultdict from ..convert import apply_heuristic, confirm_intentions, confirm_bids_namespace from ..query import get_seq_info from heudiconv import utils -from heudiconv import heuristics import logging logging.basicConfig(level=logging.INFO) -logger = logging.getLogger('fwHeuDiConv-curator') +logger = logging.getLogger('fw-heudiconv-curator') + def pretty_string_seqinfo(seqinfo): tr = seqinfo.TR if seqinfo.TR is not None else -1.0 @@ -86,6 +87,7 @@ def convert_to_bids(client, project_label, heuristic_path, subject_labels=None, logger.info("Applying heuristic to query results...") to_rename = heuristic.infotodict(seq_infos) + print(to_rename) if not to_rename: logger.debug("No changes to apply!") sys.exit(1) @@ -94,7 +96,7 @@ def convert_to_bids(client, project_label, heuristic_path, subject_labels=None, if hasattr(heuristic, "IntendedFor"): logger.info("Processing IntendedFor fields based on heuristic file") intention_map.update(heuristic.IntendedFor) - logger.debug("Intention map: %s", intention_map) + logger.debug("Intention map: %s", pprint.pformat([(k[0], v) for k, v in dict(intention_map).items()])) metadata_extras = defaultdict(list) if hasattr(heuristic, "MetadataExtras"): @@ -123,18 +125,17 @@ def convert_to_bids(client, project_label, heuristic_path, subject_labels=None, apply_heuristic(client, key, value, dry_run, intention_map[key], metadata_extras[key], subject_rename, session_rename, seqitem+1) - if not dry_run: - for ses in sessions: - confirm_intentions(client, ses) + for ses in sessions: + confirm_intentions(client, ses, dry_run) + def get_parser(): parser = argparse.ArgumentParser( - description="Use a heudiconv heuristic to curate bids on flywheel") + description="Use a heudiconv heuristic to curate data into BIDS on flywheel") parser.add_argument( "--project", help="The project in flywheel", - nargs="+", required=True ) parser.add_argument( @@ -161,35 +162,51 @@ def get_parser(): default=False ) parser.add_argument( - "--dry_run", + "--dry-run", help="Don't apply changes", action='store_true', default=False ) + parser.add_argument( + "--api-key", + help="API Key", + action='store', + default=None + ) return parser def main(): + + logger.info("{:=^70}\n".format(": fw-heudiconv curator starting up :")) + + parser = get_parser() + args = parser.parse_args() + with warnings.catch_warnings(): warnings.simplefilter("ignore") - fw = flywheel.Client() + if args.api_key: + fw = flywheel.Flywheel(args.api_key) + else: + fw = flywheel.Client() assert fw, "Your Flywheel CLI credentials aren't set!" - parser = get_parser() - args = parser.parse_args() # Print a lot if requested if args.verbose: logger.setLevel(logging.DEBUG) - project_label = ' '.join(args.project) convert_to_bids(client=fw, - project_label=project_label, + project_label=args.project, heuristic_path=args.heuristic, session_labels=args.session, subject_labels=args.subject, dry_run=args.dry_run) + logger.info("Done!") + logger.info("{:=^70}".format(": Exiting fw-heudiconv curator :")) + sys.exit() + if __name__ == '__main__': main() diff --git a/fw_heudiconv/cli/export.py b/fw_heudiconv/cli/export.py index 89af40f..15d6796 100644 --- a/fw_heudiconv/cli/export.py +++ b/fw_heudiconv/cli/export.py @@ -7,6 +7,7 @@ import shutil import re import csv +import pandas as pd from pathlib import Path from ..query import print_directory_tree @@ -78,13 +79,15 @@ def gather_bids(client, project_label, subject_labels=None, session_labels=None) 'name': container.filename, 'path': path, 'type': type of file, - 'data': container} + 'data': container.id + } ''' logger.info("Gathering bids data:") to_download = { 'dataset_description': [], 'project': [], + 'subject': [], 'session': [], 'acquisition': [] } @@ -106,7 +109,8 @@ def gather_bids(client, project_label, subject_labels=None, session_labels=None) d = { 'name': pf.name, 'type': 'attachment', - 'data': project_obj.id + 'data': project_obj.id, + 'BIDS': get_nested(pf, 'info', 'BIDS') } to_download['project'].append(d) @@ -150,13 +154,13 @@ def gather_bids(client, project_label, subject_labels=None, session_labels=None) return to_download -def download_bids(client, to_download, root_path, folders_to_download = ['anat', 'dwi', 'func', 'fmap'], dry_run=True): +def download_bids(client, to_download, root_path, folders_to_download = ['anat', 'dwi', 'func', 'fmap'], dry_run=True, name='bids_dataset'): if dry_run: logger.info("Preparing output directory tree...") else: logger.info("Downloading files...") - root_path = "/".join([root_path, "bids_dataset"]) + root_path = "/".join([root_path, name]) Path(root_path).mkdir() # handle dataset description if to_download['dataset_description']: @@ -173,7 +177,7 @@ def download_bids(client, to_download, root_path, folders_to_download = ['anat', if not any(x['name'] == '.bidsignore' for x in to_download['project']): # write bids ignore path = "/".join([root_path, ".bidsignore"]) - ignored_modalities = ['asl/\n', 'qsm/\n'] + ignored_modalities = ['asl/\n', 'qsm/\n', '*.bval\n', '*.bvec\n'] if dry_run: Path(path).touch() else: @@ -181,13 +185,26 @@ def download_bids(client, to_download, root_path, folders_to_download = ['anat', bidsignore.writelines(ignored_modalities) # deal with project level files - # NOT YET IMPLEMENTED + # Project's subject data for fi in to_download['project']: - pass + + project_path = get_nested(fi, 'BIDS', 'Path') + folder = get_nested(fi, 'BIDS', 'Folder') + ignore = get_nested(fi, 'BIDS', 'ignore') + + if project_path \ + and folder in folders_to_download \ + and not ignore \ + and any(fi['name'] == 'participants.tsv' or fi['name'] == 'participants.json'): + + proj = client.get(fi['data']) #download_path = get_metadata(fi, ['BIDS', 'Path']) #if download_path: # print('/'.join([root_path, download_path, fi['name']])) + #proj.download_file(fi['name'], file_path) + #download_sidecar(fi['sidecar'], sidecar_path, remove_bids=True) + # deal with session level files # NOT YET IMPLEMENTED for fi in to_download['session']: @@ -246,49 +263,69 @@ def download_bids(client, to_download, root_path, folders_to_download = ['anat', logger.info("Done!") print_directory_tree(root_path) - if dry_run: - shutil.rmtree(root_path) def get_parser(): parser = argparse.ArgumentParser( - description="Export BIDS compliant data") + description="Export BIDS-curated data from Flywheel") parser.add_argument( "--project", help="The project in flywheel", - nargs="+", required=True ) parser.add_argument( "--path", - help="The target directory to download", - required=True, - default="." + help="The target directory to download [DEPRECATED. PLEASE USE INSTEAD]", + default=None ) parser.add_argument( "--subject", - help="The subject to curate", + help="The subject(s) to export", nargs="+", default=None, type=str ) parser.add_argument( "--session", - help="The session to curate", + help="The session(s) to export", nargs="+", default=None, type=str ) parser.add_argument( "--folders", - help="The BIDS folders to download", + help="The BIDS folders to export", nargs="+", default=['anat', 'dwi', 'fmap', 'func'] ) parser.add_argument( - "--dry_run", - help="Don't apply changes", + "--dry-run", + help="Don't apply changes (only print the directory tree to the console)", + action='store_true', + default=False + ) + parser.add_argument( + "--destination", + help="Path to destination directory", + default=".", + type=str + ) + parser.add_argument( + "--directory-name", + help="Name of destination directory", + default="bids_directory", + type=str + ) + parser.add_argument( + "--api-key", + help="API Key", + action='store', + default=None + ) + parser.add_argument( + "--verbose", + help="Print ongoing messages of progress", action='store_true', default=False ) @@ -298,22 +335,40 @@ def get_parser(): def main(): + logger.info("{:=^70}\n".format(": fw-heudiconv exporter starting up :")) + parser = get_parser() + args = parser.parse_args() + with warnings.catch_warnings(): warnings.simplefilter("ignore") - fw = flywheel.Client() + if args.api_key: + fw = flywheel.Client(args.api_key) + else: + fw = flywheel.Client() assert fw, "Your Flywheel CLI credentials aren't set!" - parser = get_parser() - args = parser.parse_args() - project_label = ' '.join(args.project) - assert os.path.exists(args.path), "Path does not exist!" + if args.path: + destination = args.path + else: + destination = args.destination + + if not os.path.exists(destination): + logger.info("Creating destination directory...") + os.makedirs(args.destination) + downloads = gather_bids(client=fw, - project_label=project_label, + project_label=args.project, session_labels=args.session, - subject_labels=args.subject) + subject_labels=args.subject + ) - download_bids(client=fw, to_download=downloads, root_path=args.path, folders_to_download=args.folders, dry_run=args.dry_run) + download_bids(client=fw, to_download=downloads, root_path=destination, folders_to_download=args.folders, dry_run=args.dry_run, name=args.directory_name) + if args.dry_run: + shutil.rmtree(Path(args.destination, args.directory_name)) + + logger.info("Done!") + logger.info("{:=^70}".format(": Exiting fw-heudiconv exporter :")) if __name__ == '__main__': main() diff --git a/fw_heudiconv/cli/meta.py b/fw_heudiconv/cli/meta.py new file mode 100644 index 0000000..e705692 --- /dev/null +++ b/fw_heudiconv/cli/meta.py @@ -0,0 +1,347 @@ +import os +import sys +import argparse +import warnings +import flywheel +import logging +import re +import pandas as pd +import shutil +from pathlib import Path +from ..convert import get_nested + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('fw-heudiconv-curator') + + +def get_BIDS_label_from_session(ses_object, regex='sub'): + + logger.debug("Processing session {}...".format(ses_object.label)) + acquisitions = ses_object.acquisitions() + pattern = re.compile(r"(?<={}-)[a-zA-z0-9]+(?=_)".format(regex)) + + bids_labels = [] + for x in acquisitions: + files = [f for f in x.files if "nifti" in f.type] + for y in files: + bids_labels.append(get_nested(y.to_dict(), 'info', 'BIDS', 'Filename')) + if bids_labels: + for b in range(len(bids_labels)): + if bids_labels[b] is not None: + label = pattern.search(bids_labels[b]) + if label: + bids_labels[b] = label.group() + else: + bids_labels[b] = None + + final_label = set(filter(None, bids_labels)) + if len(final_label) == 1: + return final_label.pop() + else: + return None + + +def initialise_dataset(client, project_label, subject_labels=None, session_labels=None, dry_run=True): + + if dry_run: + logger.setLevel(logging.DEBUG) + logger.info("Querying Flywheel server...") + project_obj = client.projects.find_first('label="{}"'.format(project_label)) + assert project_obj, "Project not found! Maybe check spelling...?" + logger.debug('Found project: %s (%s)', project_obj['label'], project_obj.id) + sessions = client.get_project_sessions(project_obj.id) + # filters + if subject_labels: + sessions = [s for s in sessions if s.subject['label'] in subject_labels] + if session_labels: + sessions = [s for s in sessions if s.label in session_labels] + + return sessions + + +def attach_to_object(object, file, dry_run): + + if dry_run: + logger.info("{}\t-->\t{}".format(file.ljust(20), object.label)) + return 0 + my_file = Path(file) + if my_file.is_file(): + object.upload_file(my_file) + return 0 + else: + logger.error("Couldn't access file {}".format(file)) + return 1 + + +def autogen_participants_meta(project_obj, sessions, dry_run): + + participants = [] + for sess in sessions: + + participants.append({ + 'participant_id': get_BIDS_label_from_session(sess), + 'flywheel_id': sess.subject.label + }) + df = pd.DataFrame(participants) + df = df[['participant_id', 'flywheel_id']] + + tmpdir = "./tmp" + if not os.path.exists(tmpdir): + os.makedirs(tmpdir) + df.to_csv(tmpdir+"/participants.tsv", index=False, sep="\t", na_rep="n/a") + result = attach_to_object(project_obj, tmpdir+"/participants.tsv", dry_run) + shutil.rmtree(tmpdir) + return result + else: + logger.error("Couldn't create temp space to create .tsv files") + return 1 + + +def autogen_sessions_meta(client, sessions, dry_run): + + results = [] + subjects = {} + for sess in sessions: + sub = sess.subject.label + if sub in subjects: + subjects[sub].append(sess) + else: + subjects[sub] = [sess] + + tmpdir = "./tmp" + + for k, v in subjects.items(): + + subject_label = get_BIDS_label_from_session(v[0], 'sub') + if subject_label is None: + logger.error("Subject {} has no BIDS session data".format(k)) + continue + else: + sessions_dict = { + 'session_id': [get_BIDS_label_from_session(sess, 'ses') for sess in v], + 'flywheel_id': [sess.label for sess in v] + } + + df = pd.DataFrame(sessions_dict) + + if not os.path.exists(tmpdir): + os.makedirs(tmpdir) + df.to_csv(tmpdir+"/sub-{}_sessions.tsv".format(subject_label), index=False, sep="\t", na_rep="n/a") + subject_object = client.get(v[0].subject.id) + results.append(attach_to_object(subject_object, tmpdir+"/sub-{}_sessions.tsv".format(subject_label), dry_run)) + shutil.rmtree(tmpdir) + else: + logger.error("Couldn't create temp space to create .tsv files") + + if any([x == 1 for x in results]): + return 1 + else: + return 0 + + +def upload_to_session(client, sessions, label, infile, dry_run): + + subjects = {} + for sess in sessions: + sub = sess.subject.label + if sub in subjects: + subjects[sub].append(sess) + else: + subjects[sub] = [sess] + + tmpdir = "./tmp" + + if label in subjects: + target_sess = subjects[label][0] + subject_object = client.get(target_sess.subject.id) + result = attach_to_object(subject_object, infile, dry_run) + return result + else: + logger.error("Given subject label {} not found!".format(label)) + return 1 + + +def get_parser(): + + parser = argparse.ArgumentParser( + description="Curate BIDS metadata on Flywheel\n\nSee the BIDS spec for details: https://bids-specification.readthedocs.io/en/stable/03-modality-agnostic-files.html", + formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument( + "--project", + help="The project in flywheel", + required=True + ) + parser.add_argument( + "--subject", + help="The subject label(s)", + nargs="+", + default=None + ) + parser.add_argument( + "--session", + help="The session label(s)", + nargs="+", + default=None + ) + parser.add_argument( + "--verbose", + help="Print ongoing messages of progress", + action='store_true', + default=False + ) + parser.add_argument( + "--dry-run", + help="Don't apply changes", + action='store_true', + default=False + ) + + # participants metadata + participants_meta = parser.add_mutually_exclusive_group() + participants_meta.add_argument( + "--autogen-participants-meta", + help="Automatically generate participants.tsv metadata", + action='store_true', + default=False + ) + participants_meta.add_argument( + "--upload-participants-meta", + help="Path to a participants.tsv metadata file to upload", + action='store' + ) + + # sessions metadata + sessions_meta = parser.add_mutually_exclusive_group() + sessions_meta.add_argument( + "--autogen-sessions-meta", + help="Automatically generate sub-