From 2cec8cb123b54b6b53250d25e8c8d64dec98ae4f Mon Sep 17 00:00:00 2001 From: "William F. Broderick" Date: Tue, 16 Jan 2018 16:47:44 -0500 Subject: [PATCH] Gets snakemake working better with cluster This commit makes some changes to make the Snakefile work better with the cluster: - resources are now set in the rules and then used in the cluster config - we now have plugin_args working with the prisma_preproc script, allowing us to make use of the MultiProc plugin on the cluster - starts GLMdenoise rule - cluster config file makes sure to inherit output, error, job name, mem and cpus_per_task from the rule itself --- Snakefile | 31 +++++++++++++++++++------------ cluster.json | 15 +++++++-------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/Snakefile b/Snakefile index a531ea9..693e7e6 100644 --- a/Snakefile +++ b/Snakefile @@ -1,5 +1,5 @@ import os -# the directory that contains the data (in BIDS format) + configfile: "config.yml" if not os.path.isdir(config["DATA_DIR"]): @@ -33,19 +33,16 @@ rule stimuli_idx: shell: "python sfp/stimuli.py {wildcards.subject} -i -s {params.seed}" -# I was thinking I should do something like this -# https://groups.google.com/forum/#!topic/snakemake/e0XNmXqL7Bg in order to be able to run things -# both on and off cluster, but now I'm thinking maybe I make the things that should be run on the -# cluster always call `module` so they fail if run locally. that would work for me at any rate. - -# this has to be run on the cluster, otherwise it will fail rule preprocess: input: os.path.join(config["DATA_DIR"], "derivatives", "freesurfer", "{subject}"), data_dir = os.path.join(config["DATA_DIR"], "{subject}", "{session}"), freesurfer_dir = os.path.join(config["DATA_DIR"], "derivatives", "freesurfer"), output: - output_dir = os.path.join(config["DATA_DIR"], "derivatives", "preprocessed", "{subject}", "{session}"), + output_dir = os.path.join(config["DATA_DIR"], "derivatives", "preprocessed", "{subject}", "{session}") + resources: + cpus_per_task = 10, + mem = 48 params: sbref = 1, epis = lambda wildcards: @@ -55,9 +52,10 @@ rule preprocess: ('sub-wlsubj045', 'ses-pilot01'): [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]}.get((wildcards.subject, wildcards.session)), distortPE = 'PA', distortrevPE = 'AP', - plugin = "Linear", - working_dir = "/scratch/wfb229", - PEdim = 'y' + plugin = "MultiProc", + working_dir = lambda wildcards: "/scratch/wfb229/preproc_%s_%s" % (wildcards.subject, wildcards.session), + PEdim = 'y', + plugin_args = lambda wildcards, resources: ",".join("%s:%s" % (k,v) for k,v in {'n_procs': resources.cpus_per_task, 'memory_gb': resources.mem}.items()) benchmark: os.path.join(config["DATA_DIR"], "code", "preprocessed", "{subject}_{session}_benchmark.txt") log: @@ -67,4 +65,13 @@ rule preprocess: "python ~/MRI_tools/preprocessing/prisma_preproc.py -subject {wildcards.subject} -datadir " "{input.data_dir} -outdir {output.output_dir} -epis {params.epis} -sbref {params.sbref} " "-distortPE {params.distortPE} -distortrevPE {params.distortrevPE} -working_dir " - "{params.working_dir} -PEdim {params.PEdim} -plugin {params.plugin} -dir_structure bids" + "{params.working_dir} -PEdim {params.PEdim} -plugin {params.plugin} -dir_structure bids " + "-plugin_args {params.plugin_args}" + +rule GLMdenoise: + input: + os.path.join(config["DATA_DIR"], "derivatives", "preprocessed", "{subject}", "{session}"), + GLMdenoise_path = os.path.join(os.path.expanduser('~'), 'matlab-toolboxes', 'GLMdenoise') + resources: + cpus_per_task = 8, + mem = 62 diff --git a/cluster.json b/cluster.json index d59a957..4173a28 100644 --- a/cluster.json +++ b/cluster.json @@ -3,14 +3,13 @@ { "nodes": 1, "tasks_per_node": 1, - "cpus_per_task": 1, "mem": "48GB", - "time": "12:00:00" - }, - - "preprocess": - { - "job_name": "preproc", - "mem": "10GB" + "time": "12:00:00", + "job_name": "{rule}.{wildcards}", + "cpus_per_task": 1, + "output": "{log}", + "error": "{log}", + "mem": "{resources.mem}GB", + "cpus_per_task": "{resources.cpus_per_task}" } }