diff --git a/README.md b/README.md index 7ba1275..e5d5b8d 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ or for snakemake<8.0, use: snakemake -c all --use-singularity ``` -Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher:mem_mb=60000 fuse_dataset:mem_mb=100000` +Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_spark_stitching:mem_mb=60000 bigstitcher_spark_fusion:mem_mb=100000` 7. If you want to run the workflow using a batch job submission server, please see the executor plugins here: https://snakemake.github.io/snakemake-plugin-catalog/ diff --git a/config/config.yml b/config/config.yml index 6a5c684..7fa2b4c 100644 --- a/config/config.yml +++ b/config/config.yml @@ -6,7 +6,7 @@ work: 'work' remote_creds: '~/.config/gcloud/application_default_credentials.json' #this is needed so we can pass creds to container -write_ome_zarr_direct: False #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk +write_ome_zarr_direct: True #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk cores_per_rule: 32 @@ -36,18 +36,18 @@ bigstitcher: downsample_in_x: 4 downsample_in_y: 4 downsample_in_z: 1 - method: "phase_corr" - methods: - phase_corr: "Phase Correlation" - optical_flow: "Lucas-Kanade" + method: "phase_corr" #unused + methods: #unused + phase_corr: "Phase Correlation" + optical_flow: "Lucas-Kanade" filter_pairwise_shifts: - enabled: 1 - min_r: 0.2 - max_shift_total: 50 + enabled: 1 #unused + min_r: 0.7 + max_shift_total: 50 global_optimization: enabled: 1 - method: ONE_ROUND_SIMPLE - methods: + method: TWO_ROUND_ITERATIVE + methods: #unused, only for reference ONE_ROUND_SIMPLE: "One-Round" ONE_ROUND_ITERATIVE: "One-Round with iterative dropping of bad links" TWO_ROUND_SIMPLE: "Two-Round using metadata to align unconnected Tiles" @@ -64,7 +64,7 @@ bigstitcher: block_size_factor_z: 32 ome_zarr: - desc: sparkstitchedflatcorr + desc: sparkstitchedraw #flatcorr max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1 @@ -100,7 +100,7 @@ ome_zarr: id: 0 name: spim version: "0.4" - use_zipstore: True #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr + use_zipstore: False #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr nifti: levels: #cannot be higher than max_downsampling_layers in ome_zarr @@ -155,7 +155,5 @@ report: containers: - #spimprep: 'docker://khanlab/spimprep-deps:main' - spimprep: '/local/scratch/spimprep-deps_spark.sif' - # updated.sif' + spimprep: 'docker://khanlab/spimprep-deps:main' diff --git a/workflow/Snakefile b/workflow/Snakefile index 14abdd6..cd19ab3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -33,7 +33,7 @@ rule all: input: get_all_targets(), get_bids_toplevel_targets(), - # get_qc_targets(), #need to skip this if using prestitched + get_qc_targets(), #need to skip this if using prestitched localrule: True diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index ebd2d49..4256e3b 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -93,7 +93,7 @@ rule zarr_to_bdv: script: "../scripts/zarr_to_n5_bdv.py" -""" + rule bigstitcher: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, @@ -112,7 +112,7 @@ rule bigstitcher: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcherproc.sh", + suffix="bigstitcherfiji.sh", ) ), dataset_xml=temp( @@ -123,13 +123,13 @@ rule bigstitcher: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher.xml", + suffix="bigstitcherfiji.xml", ) ), benchmark: bids( root="benchmarks", - datatype="bigstitcherproc", + datatype="bigstitcherfiji", subject="{subject}", sample="{sample}", acq="{acq}", @@ -139,7 +139,7 @@ rule bigstitcher: log: bids( root="logs", - datatype="bigstitcherproc", + datatype="bigstitcherfiji", subject="{subject}", sample="{sample}", acq="{acq}", @@ -159,19 +159,30 @@ rule bigstitcher: " {params.fiji_launcher_cmd} && " " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " " && {output.launcher} |& tee {log} && {params.rm_old_xml}" -""" + rule bigstitcher_spark_stitching: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, dataset_xml=rules.zarr_to_bdv.output.bdv_xml, params: - downsampling='--downsampling={dsx},{dsy},{dsz}'.format(dsx=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_x'], - dsy=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_y'], - dsz=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_z']), - min_r='--minR={min_r}'.format(min_r=config['bigstitcher']['filter_pairwise_shifts']['min_r']), - max_shift='--maxShiftTotal={max_shift}'.format(max_shift=config['bigstitcher']['filter_pairwise_shifts']['max_shift_total']), - mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + downsampling="--downsampling={dsx},{dsy},{dsz}".format( + dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], + dsy=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], + dsz=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], + ), + min_r="--minR={min_r}".format( + min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"] + ), + max_shift="--maxShiftTotal={max_shift}".format( + max_shift=config["bigstitcher"]["filter_pairwise_shifts"][ + "max_shift_total" + ] + ), + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), + rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", output: dataset_xml=temp( bids( @@ -215,7 +226,8 @@ rule bigstitcher_spark_stitching: shell: "cp {input.dataset_xml} {output.dataset_xml} && " "stitching {params.mem_gb} {threads} -x {output.dataset_xml} " - " {params.min_r} {params.downsampling} " + " {params.min_r} {params.downsampling} {params.max_shift} && " + "{params.rm_old_xml}" rule bigstitcher_spark_solver: @@ -223,11 +235,18 @@ rule bigstitcher_spark_solver: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml, params: - downsampling='--downsampling={dsx},{dsy},{dsz}'.format(dsx=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_x'], - dsy=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_y'], - dsz=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_z']), - method='--method={method}'.format(method=config['bigstitcher']['global_optimization']['method']), - mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + downsampling="--downsampling={dsx},{dsy},{dsz}".format( + dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], + dsy=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], + dsz=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], + ), + method="--method={method}".format( + method=config["bigstitcher"]["global_optimization"]["method"] + ), + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), + rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", output: dataset_xml=temp( bids( @@ -271,12 +290,13 @@ rule bigstitcher_spark_solver: shell: "cp {input.dataset_xml} {output.dataset_xml} && " "solver {params.mem_gb} {threads} -x {output.dataset_xml} " - " -s STITCHING --lambda 0.1 " #lambda 0.1 is default (can expose this if needed) - " {params.method} " - + " -s STITCHING --lambda 0.1 " + " {params.method} && " + "{params.rm_old_xml}" + #lambda 0.1 is default (can expose this if needed) -rule fuse_dataset: +rule bigstitcher_fusion: input: dataset_n5=bids( root=work, @@ -294,7 +314,11 @@ rule fuse_dataset: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher{}.xml".format('solver' if config['bigstitcher']['global_optimization']['enabled'] else 'stitching'), + suffix="bigstitcher{}.xml".format( + "solver" + if config["bigstitcher"]["global_optimization"]["enabled"] + else "stitching" + ), ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: @@ -363,7 +387,7 @@ rule fuse_dataset: " && {output.launcher} |& tee {log}" -rule fuse_dataset_spark: +rule bigstitcher_spark_fusion: input: dataset_n5=bids( root=work, @@ -381,7 +405,11 @@ rule fuse_dataset_spark: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher{}.xml".format('solver' if config['bigstitcher']['global_optimization']['enabled'] else 'stitching'), + suffix="bigstitcher{}.xml".format( + "solver" + if config["bigstitcher"]["global_optimization"]["enabled"] + else "stitching" + ), ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: @@ -398,7 +426,9 @@ rule fuse_dataset_spark: bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], ), - mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), output: zarr=temp( directory( @@ -440,7 +470,7 @@ rule fuse_dataset_spark: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=40000, + mem_mb=30000, threads: config["cores_per_rule"] group: "preproc" diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 0248dd3..88d40e7 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -368,13 +368,6 @@ def get_output_ome_zarr(acq_type): } -def get_output_ome_zarr_as_input(wildcards): - if 'blaze' in wildcards.acq: - return get_output_ome_zarr('blaze') - elif 'prestitched' in wildcards.acq: - return get_output_ome_zarr('prestitched') - - def get_storage_creds(): """for rules that deal with remote storage directly""" protocol = Path(config["root"]).protocol diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk index 77a58b3..0ea5854 100644 --- a/workflow/rules/ome_zarr.smk +++ b/workflow/rules/ome_zarr.smk @@ -117,14 +117,15 @@ rule ome_zarr_to_zipstore: rule ome_zarr_to_nii: input: - zarr=bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr())), **get_storage_creds(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), params: channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain), uri=get_output_ome_zarr_uri(), diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index a79289f..fc32d7b 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -73,8 +73,15 @@ rule generate_flatfield_qc: rule generate_whole_slice_qc: "Generates an html file to view whole slices from preprocessed images" input: - unpack(get_output_ome_zarr_as_input), **get_storage_creds(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), ws_html=config["report"]["resources"]["ws_html"], params: ws_s_start=config["report"]["whole_slice_viewer"]["slice_start"], @@ -112,8 +119,15 @@ rule generate_whole_slice_qc: rule generate_volume_qc: "Generates an html file to view the volume rendered image" input: - unpack(get_output_ome_zarr_as_input), **get_storage_creds(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), vol_viewer_dir=config["report"]["resources"]["vol_viewer_dir"], params: uri=get_output_ome_zarr_uri(), diff --git a/workflow/scripts/generate_volume_qc.py b/workflow/scripts/generate_volume_qc.py index d5c48d1..12258ff 100644 --- a/workflow/scripts/generate_volume_qc.py +++ b/workflow/scripts/generate_volume_qc.py @@ -14,8 +14,6 @@ # where html file should be written html_dest = snakemake.output.html -# inputted ome-zarr path -ome_data = snakemake.input.zarr # move volume renderer into the subjects directory copy_tree(snakemake.input.vol_viewer_dir, resource_dir) @@ -28,7 +26,11 @@ fs_args={} fs = get_fsspec(uri,**fs_args) -store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +if Path(uri).suffix == '.zip': + store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') +else: + store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') darr = da.from_zarr(store,component='/5') # Get most downsampled ome-zarr image diff --git a/workflow/scripts/generate_whole_slice_qc.py b/workflow/scripts/generate_whole_slice_qc.py index f721128..ae072d8 100644 --- a/workflow/scripts/generate_whole_slice_qc.py +++ b/workflow/scripts/generate_whole_slice_qc.py @@ -1,7 +1,6 @@ import os import math -from ome_zarr.io import parse_url -from ome_zarr.reader import Reader +import dask.array as da import matplotlib.pyplot as plt import numpy as np from jinja2 import Environment, FileSystemLoader @@ -19,14 +18,10 @@ ws_s_start=snakemake.params.ws_s_start ws_cmap=snakemake.params.ws_cmap -# input ome-zarr file -ome= snakemake.input.zarr - # output paths image_dir = snakemake.output.images_dir out_html = snakemake.output.html -from ome_zarr.io import ZarrLocation uri = snakemake.params.uri if is_remote(uri): @@ -35,18 +30,19 @@ fs_args={} fs = get_fsspec(uri,**fs_args) -store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') -zarrloc = ZarrLocation(store) -# read ome-zarr data and convert to list -proc_reader= Reader(zarrloc) -proc_data=list(proc_reader())[0].data +if Path(uri).suffix == '.zip': + store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') +else: + store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +darr = da.from_zarr(store,component='/5') os.makedirs(image_dir, exist_ok=True) channels = [] # for each channel add the images of the most downsampled data -for chan_num, channel in enumerate(proc_data[-1]): +for chan_num, channel in enumerate(darr): slice = ws_s_start chan = [] slices = [] diff --git a/workflow/scripts/ome_zarr_to_nii.py b/workflow/scripts/ome_zarr_to_nii.py index 720f9b4..728e1c8 100644 --- a/workflow/scripts/ome_zarr_to_nii.py +++ b/workflow/scripts/ome_zarr_to_nii.py @@ -19,18 +19,11 @@ fs = get_fsspec(uri,**fs_args) -print(fs) -print(uri) -print(Path(uri)) -print(Path(uri).path) - -print(Path(uri).suffix) if Path(uri).suffix == '.zip': store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') else: store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') -print(store) zi = zarr.open(store=store,mode='r')