From c77fc231b2078cc3ab802b6d67c4447f7c304c36 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 27 Aug 2024 00:30:42 -0400 Subject: [PATCH] add aggregate rule --- resources/qc/README.md | 11 +-- workflow/Snakefile | 1 + workflow/rules/common.smk | 50 ++++++++++--- workflow/rules/qc.smk | 97 ++++++++++++++----------- workflow/scripts/generate_subject_qc.py | 17 ----- 5 files changed, 98 insertions(+), 78 deletions(-) diff --git a/resources/qc/README.md b/resources/qc/README.md index 400b91e..a77594a 100644 --- a/resources/qc/README.md +++ b/resources/qc/README.md @@ -2,18 +2,13 @@ ### 1: Run the workflow with personalized report generation configurations -### 2: Navigate to qc directory: - -```bash -cd ./qc -``` -### 3: Create a Python web server: +### 2: Navigate to qc directory, then create a Python web server: ```bash python -m http.server ``` -### 4: Open the link generated in a browser and open qc_report.html +### 3: Open the link generated in a browser and open `qc_report.html` ### Note: - Can view the whole slice images and the flatfield corrections without running the web server. However, to view the volume rendered brain, the web server is required. \ No newline at end of file + Can view the whole slice images and the flatfield corrections without running the web server. However, to view the volume rendered brain, the web server is required. diff --git a/workflow/Snakefile b/workflow/Snakefile index 85ecad7..155244d 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -33,6 +33,7 @@ rule all: input: get_all_targets(), get_bids_toplevel_targets(), + get_qc_targets(), localrule: True diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 0e6c1a8..13e6c6f 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -29,6 +29,20 @@ def expand_bids(expand_kwargs, **bids_kwargs): return files +def remote_file(filename): + if is_remote(filename): + return storage(str(filename)) + else: + return filename + + +def remote_directory(dirname): + if is_remote(dirname): + return storage(directory(str(dirname))) + else: + return directory(dirname) + + def directory_bids(root, *args, **kwargs): """Similar to expand_bids, this replacement function is needed to ensure storage() comes after directory() tags""" @@ -107,21 +121,25 @@ def get_all_targets(): ), ) ) - if config["report"]["create_report"]: - targets.extend( - expand( - Path(root) - / "qc" - / "sub-{subject}_sample-{sample}_acq-{acq}" - / "subject.html", - subject=datasets.loc[i, "subject"], - sample=datasets.loc[i, "sample"], - acq=datasets.loc[i, "acq"], - ) - ) return targets +def get_all_subj_html(wildcards): + htmls = [] + + for i in range(len(datasets)): + + html = "{root}/qc/sub-{subject}_sample-{sample}_acq-{acq}/subject.html".format( + root=root, + subject=datasets.loc[i, "subject"], + sample=datasets.loc[i, "sample"], + acq=datasets.loc[i, "acq"], + ) + htmls.append(remote_file(html)) + + return htmls + + def get_bids_toplevel_targets(): targets = [] targets.append(bids_toplevel(root, "README.md")) @@ -132,6 +150,14 @@ def get_bids_toplevel_targets(): return targets +def get_qc_targets(): + targets = [] + if config["report"]["create_report"]: + targets.append(remote_file(Path(root) / "qc" / "qc_report.html")) + targets.append(remote_file(Path(root) / "qc" / "README.md")) + return targets + + def get_input_dataset(wildcards): """returns path to extracted dataset or path to provided input folder""" in_dataset = get_dataset_path(wildcards) diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 82b2f59..cef0cf8 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -4,7 +4,7 @@ rule setup_qc_dir: input: readme_md=config["report"]["resources"]["readme_md"], output: - readme_md=str(Path(root) / "qc" / "README.md"), + readme_md=remote_file(Path(root) / "qc" / "README.md"), log: "logs/setup_qc_dir_log.txt", shell: @@ -38,26 +38,24 @@ rule generate_flatfield_qc: ff_s_step=config["report"]["flatfield_corrected"]["slice_step"], ff_cmap=config["report"]["flatfield_corrected"]["colour_map"], output: - html=Path(root) - / "qc" - / "sub-{subject}_sample-{sample}_acq-{acq}/flatfieldqc.html", - corr_images_dir=directory( - str( - Path(root) - / "qc" - / "sub-{subject}_sample-{sample}_acq-{acq}" - / "images" - / "corr" - ) + html=remote_file( + Path(root) + / "qc" + / "sub-{subject}_sample-{sample}_acq-{acq}/flatfieldqc.html" ), - uncorr_images_dir=directory( - str( - Path(root) - / "qc" - / "sub-{subject}_sample-{sample}_acq-{acq}" - / "images" - / "uncorr" - ) + corr_images_dir=remote_directory( + Path(root) + / "qc" + / "sub-{subject}_sample-{sample}_acq-{acq}" + / "images" + / "corr" + ), + uncorr_images_dir=remote_directory( + Path(root) + / "qc" + / "sub-{subject}_sample-{sample}_acq-{acq}" + / "images" + / "uncorr" ), log: bids( @@ -85,20 +83,18 @@ rule generate_whole_slice_qc: uri=get_output_ome_zarr_uri(), storage_provider_settings=workflow.storage_provider_settings, output: - html=str( + html=remote_file( Path(root) / "qc" / "sub-{subject}_sample-{sample}_acq-{acq}" / "whole_slice_qc.html" ), - images_dir=directory( - str( - Path(root) - / "qc" - / "sub-{subject}_sample-{sample}_acq-{acq}" - / "images" - / "whole" - ) + images_dir=remote_directory( + Path(root) + / "qc" + / "sub-{subject}_sample-{sample}_acq-{acq}" + / "images" + / "whole" ), log: bids( @@ -123,16 +119,18 @@ rule generate_volume_qc: uri=get_output_ome_zarr_uri(), storage_provider_settings=workflow.storage_provider_settings, output: - resources=directory( + resources=remote_directory( Path(root) / "qc" / "sub-{subject}_sample-{sample}_acq-{acq}" / "volume_resources" ), - html=Path(root) - / "qc" - / "sub-{subject}_sample-{sample}_acq-{acq}" - / "volume_qc.html", + html=remote_file( + Path(root) + / "qc" + / "sub-{subject}_sample-{sample}_acq-{acq}" + / "volume_qc.html" + ), log: bids( root="logs", @@ -150,17 +148,16 @@ rule generate_subject_qc: "Generates html files to access all the subjects qc reports in one place" input: subject_html=config["report"]["resources"]["subject_html"], - report_html=config["report"]["resources"]["report_html"], ws_html=rules.generate_whole_slice_qc.output.html, ff_html=rules.generate_flatfield_qc.output.html, vol_html=rules.generate_volume_qc.output.html, - params: - total_html=str(Path(root) / "qc" / "qc_report.html"), #this should really be an output of a rule that expands over subjects output: - sub_html=Path(root) - / "qc" - / "sub-{subject}_sample-{sample}_acq-{acq}" - / "subject.html", + sub_html=remote_file( + Path(root) + / "qc" + / "sub-{subject}_sample-{sample}_acq-{acq}" + / "subject.html" + ), log: bids( root="logs", @@ -172,3 +169,21 @@ rule generate_subject_qc: ), script: "../scripts/generate_subject_qc.py" + + +rule generate_aggregate_qc: + input: + report_html=config["report"]["resources"]["report_html"], + subj_htmls=get_all_subj_html, + params: + datasets=datasets, + output: + total_html=remote_file(Path(root) / "qc" / "qc_report.html"), + log: + bids( + root="logs", + datatype="generate_aggregate_qc", + suffix="log.txt", + ), + script: + "../scripts/generate_aggregate_qc.py" diff --git a/workflow/scripts/generate_subject_qc.py b/workflow/scripts/generate_subject_qc.py index 51d2a85..6c75542 100644 --- a/workflow/scripts/generate_subject_qc.py +++ b/workflow/scripts/generate_subject_qc.py @@ -14,7 +14,6 @@ # output html files sub_html = snakemake.output.sub_html -total_html = snakemake.params.total_html # Wildcards subject = snakemake.wildcards.subject @@ -33,19 +32,3 @@ with open(sub_html, 'w') as f: f.write(output) -relative_path = Path(sub_html).relative_to(Path(snakemake.params.total_html).parent) -# Create line to add link to subject into final qc report combining all subjects -sub_link = f'\n\t\t{subject}-{sample}-{acq}
' - -# if not first sample just add the one link -if(path.exists(total_html)): - with open(total_html,'a') as f: - f.write(sub_link) - -# if it is the first sample write out the template -else: - template = env.get_template(snakemake.input.report_html) - output = template.render() - output+=sub_link - with open(total_html, 'w') as f: - f.write(output)