diff --git a/README.md b/README.md index 7ba1275..e5d5b8d 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ or for snakemake<8.0, use: snakemake -c all --use-singularity ``` -Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher:mem_mb=60000 fuse_dataset:mem_mb=100000` +Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_spark_stitching:mem_mb=60000 bigstitcher_spark_fusion:mem_mb=100000` 7. If you want to run the workflow using a batch job submission server, please see the executor plugins here: https://snakemake.github.io/snakemake-plugin-catalog/ diff --git a/config/config.yml b/config/config.yml index 3d6e0a7..edc16ba 100644 --- a/config/config.yml +++ b/config/config.yml @@ -36,34 +36,35 @@ bigstitcher: downsample_in_x: 4 downsample_in_y: 4 downsample_in_z: 1 - method: "phase_corr" - methods: - phase_corr: "Phase Correlation" - optical_flow: "Lucas-Kanade" + method: "phase_corr" #unused + methods: #unused + phase_corr: "Phase Correlation" + optical_flow: "Lucas-Kanade" filter_pairwise_shifts: - enabled: 1 + enabled: 1 #unused min_r: 0.7 + max_shift_total: 50 global_optimization: - enabled: 1 - strategy: two_round - strategies: - one_round: "One-Round" - one_round_iterative: "One-Round with iterative dropping of bad links" - two_round: "Two-Round using metadata to align unconnected Tiles" - two_round_iterative: "Two-Round using Metadata to align unconnected Tiles and iterative dropping of bad links" + enabled: 1 + method: TWO_ROUND_ITERATIVE + methods: #unused, only for reference + ONE_ROUND_SIMPLE: "One-Round" + ONE_ROUND_ITERATIVE: "One-Round with iterative dropping of bad links" + TWO_ROUND_SIMPLE: "Two-Round using metadata to align unconnected Tiles" + TWO_ROUND_ITERATIVE: "Two-Round using Metadata to align unconnected Tiles and iterative dropping of bad links" fuse_dataset: downsampling: 1 block_size_x: 256 # for storage block_size_y: 256 - block_size_z: 1 + block_size_z: 8 block_size_factor_x: 1 #e.g. 2 will use 2*block_size for computation block_size_factor_y: 1 - block_size_factor_z: 256 + block_size_factor_z: 32 ome_zarr: - desc: stitchedflatcorr + desc: sparkstitchedflatcorr max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1 diff --git a/workflow/Snakefile b/workflow/Snakefile index 14abdd6..cd19ab3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -33,7 +33,7 @@ rule all: input: get_all_targets(), get_bids_toplevel_targets(), - # get_qc_targets(), #need to skip this if using prestitched + get_qc_targets(), #need to skip this if using prestitched localrule: True diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index 7e3c0c4..4256e3b 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -112,7 +112,7 @@ rule bigstitcher: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcherproc.sh", + suffix="bigstitcherfiji.sh", ) ), dataset_xml=temp( @@ -123,13 +123,13 @@ rule bigstitcher: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher.xml", + suffix="bigstitcherfiji.xml", ) ), benchmark: bids( root="benchmarks", - datatype="bigstitcherproc", + datatype="bigstitcherfiji", subject="{subject}", sample="{sample}", acq="{acq}", @@ -139,7 +139,7 @@ rule bigstitcher: log: bids( root="logs", - datatype="bigstitcherproc", + datatype="bigstitcherfiji", subject="{subject}", sample="{sample}", acq="{acq}", @@ -161,7 +161,142 @@ rule bigstitcher: " && {output.launcher} |& tee {log} && {params.rm_old_xml}" -rule fuse_dataset: +rule bigstitcher_spark_stitching: + input: + dataset_n5=rules.zarr_to_bdv.output.bdv_n5, + dataset_xml=rules.zarr_to_bdv.output.bdv_xml, + params: + downsampling="--downsampling={dsx},{dsy},{dsz}".format( + dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], + dsy=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], + dsz=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], + ), + min_r="--minR={min_r}".format( + min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"] + ), + max_shift="--maxShiftTotal={max_shift}".format( + max_shift=config["bigstitcher"]["filter_pairwise_shifts"][ + "max_shift_total" + ] + ), + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), + rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", + output: + dataset_xml=temp( + bids( + root=work, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="bigstitcherstitching.xml", + ) + ), + benchmark: + bids( + root="benchmarks", + datatype="bigstitcherstitching", + subject="{subject}", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="benchmark.tsv", + ) + log: + bids( + root="logs", + datatype="bigstitcherproc", + subject="{subject}", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="log.txt", + ), + container: + config["containers"]["spimprep"] + resources: + runtime=30, + mem_mb=40000, + threads: config["cores_per_rule"] + group: + "preproc" + shell: + "cp {input.dataset_xml} {output.dataset_xml} && " + "stitching {params.mem_gb} {threads} -x {output.dataset_xml} " + " {params.min_r} {params.downsampling} {params.max_shift} && " + "{params.rm_old_xml}" + + +rule bigstitcher_spark_solver: + input: + dataset_n5=rules.zarr_to_bdv.output.bdv_n5, + dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml, + params: + downsampling="--downsampling={dsx},{dsy},{dsz}".format( + dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], + dsy=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], + dsz=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], + ), + method="--method={method}".format( + method=config["bigstitcher"]["global_optimization"]["method"] + ), + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), + rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", + output: + dataset_xml=temp( + bids( + root=work, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="bigstitchersolver.xml", + ) + ), + benchmark: + bids( + root="benchmarks", + datatype="bigstitchersolver", + subject="{subject}", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="benchmark.tsv", + ) + log: + bids( + root="logs", + datatype="bigstitcherproc", + subject="{subject}", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="log.txt", + ), + container: + config["containers"]["spimprep"] + resources: + runtime=30, + mem_mb=40000, + threads: config["cores_per_rule"] + group: + "preproc" + shell: + "cp {input.dataset_xml} {output.dataset_xml} && " + "solver {params.mem_gb} {threads} -x {output.dataset_xml} " + " -s STITCHING --lambda 0.1 " + " {params.method} && " + "{params.rm_old_xml}" + #lambda 0.1 is default (can expose this if needed) + + +rule bigstitcher_fusion: input: dataset_n5=bids( root=work, @@ -179,7 +314,11 @@ rule fuse_dataset: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher.xml", + suffix="bigstitcher{}.xml".format( + "solver" + if config["bigstitcher"]["global_optimization"]["enabled"] + else "stitching" + ), ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: @@ -238,7 +377,7 @@ rule fuse_dataset: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=20000, + mem_mb=40000, threads: config["cores_per_rule"] group: "preproc" @@ -248,7 +387,7 @@ rule fuse_dataset: " && {output.launcher} |& tee {log}" -rule fuse_dataset_spark: +rule bigstitcher_spark_fusion: input: dataset_n5=bids( root=work, @@ -266,9 +405,30 @@ rule fuse_dataset_spark: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher.xml", + suffix="bigstitcher{}.xml".format( + "solver" + if config["bigstitcher"]["global_optimization"]["enabled"] + else "stitching" + ), ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", + params: + channel=lambda wildcards: "--channelId={channel}".format( + channel=get_stains(wildcards).index(wildcards.stain) + ), + block_size="--blockSize={bsx},{bsy},{bsz}".format( + bsx=config["bigstitcher"]["fuse_dataset"]["block_size_x"], + bsy=config["bigstitcher"]["fuse_dataset"]["block_size_y"], + bsz=config["bigstitcher"]["fuse_dataset"]["block_size_z"], + ), + block_size_factor="--blockScale={bsfx},{bsfy},{bsfz}".format( + bsfx=config["bigstitcher"]["fuse_dataset"]["block_size_factor_x"], + bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], + bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], + ), + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), output: zarr=temp( directory( @@ -310,9 +470,12 @@ rule fuse_dataset_spark: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=20000, + mem_mb=30000, threads: config["cores_per_rule"] group: "preproc" shell: - "affine-fusion ..." + "affine-fusion {params.mem_gb} {threads} --preserveAnisotropy -x {input.dataset_xml} " + " -o {output.zarr} -d /fused/s0 -s ZARR " + " --UINT16 --minIntensity 0 --maxIntensity 65535 " + "{params.block_size} {params.block_size_factor} {params.channel}" diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index e6b7ad0..88d40e7 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -312,7 +312,7 @@ def get_output_ome_zarr_uri(): datatype="micr", sample="{sample}", acq="{acq}", - suffix="SPIM.ome.zarr", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), ) else: return "local://" + _bids( @@ -321,7 +321,7 @@ def get_output_ome_zarr_uri(): datatype="micr", sample="{sample}", acq="{acq}", - suffix="SPIM.ome.zarr", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), ) @@ -368,37 +368,6 @@ def get_output_ome_zarr(acq_type): } -def get_input_ome_zarr_to_nii(): - if is_remote(config["root"]): - return bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{extension}".format(extension=get_extension_ome_zarr()), - ) - else: - if config["write_ome_zarr_direct"]: - return bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{extension}".format(extension=get_extension_ome_zarr()), - ) - else: - return bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix=f"SPIM.ome.zarr", - ) - - def get_storage_creds(): """for rules that deal with remote storage directly""" protocol = Path(config["root"]).protocol diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk index 105fb8d..0ea5854 100644 --- a/workflow/rules/ome_zarr.smk +++ b/workflow/rules/ome_zarr.smk @@ -118,7 +118,14 @@ rule ome_zarr_to_zipstore: rule ome_zarr_to_nii: input: **get_storage_creds(), - zarr=get_input_ome_zarr_to_nii(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), params: channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain), uri=get_output_ome_zarr_uri(), diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index cb44298..fc32d7b 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -74,7 +74,14 @@ rule generate_whole_slice_qc: "Generates an html file to view whole slices from preprocessed images" input: **get_storage_creds(), - ome=get_input_ome_zarr_to_nii(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), ws_html=config["report"]["resources"]["ws_html"], params: ws_s_start=config["report"]["whole_slice_viewer"]["slice_start"], @@ -113,7 +120,14 @@ rule generate_volume_qc: "Generates an html file to view the volume rendered image" input: **get_storage_creds(), - ome=get_input_ome_zarr_to_nii(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), vol_viewer_dir=config["report"]["resources"]["vol_viewer_dir"], params: uri=get_output_ome_zarr_uri(), diff --git a/workflow/scripts/generate_volume_qc.py b/workflow/scripts/generate_volume_qc.py index 1fb1398..12258ff 100644 --- a/workflow/scripts/generate_volume_qc.py +++ b/workflow/scripts/generate_volume_qc.py @@ -14,8 +14,6 @@ # where html file should be written html_dest = snakemake.output.html -# inputted ome-zarr path -ome_data = snakemake.input.ome # move volume renderer into the subjects directory copy_tree(snakemake.input.vol_viewer_dir, resource_dir) @@ -28,7 +26,11 @@ fs_args={} fs = get_fsspec(uri,**fs_args) -store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +if Path(uri).suffix == '.zip': + store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') +else: + store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') darr = da.from_zarr(store,component='/5') # Get most downsampled ome-zarr image diff --git a/workflow/scripts/generate_whole_slice_qc.py b/workflow/scripts/generate_whole_slice_qc.py index ed64b21..ae072d8 100644 --- a/workflow/scripts/generate_whole_slice_qc.py +++ b/workflow/scripts/generate_whole_slice_qc.py @@ -1,7 +1,6 @@ import os import math -from ome_zarr.io import parse_url -from ome_zarr.reader import Reader +import dask.array as da import matplotlib.pyplot as plt import numpy as np from jinja2 import Environment, FileSystemLoader @@ -19,14 +18,10 @@ ws_s_start=snakemake.params.ws_s_start ws_cmap=snakemake.params.ws_cmap -# input ome-zarr file -ome= snakemake.input.ome - # output paths image_dir = snakemake.output.images_dir out_html = snakemake.output.html -from ome_zarr.io import ZarrLocation uri = snakemake.params.uri if is_remote(uri): @@ -35,18 +30,19 @@ fs_args={} fs = get_fsspec(uri,**fs_args) -store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') -zarrloc = ZarrLocation(store) -# read ome-zarr data and convert to list -proc_reader= Reader(zarrloc) -proc_data=list(proc_reader())[0].data +if Path(uri).suffix == '.zip': + store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') +else: + store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +darr = da.from_zarr(store,component='/5') os.makedirs(image_dir, exist_ok=True) channels = [] # for each channel add the images of the most downsampled data -for chan_num, channel in enumerate(proc_data[-1]): +for chan_num, channel in enumerate(darr): slice = ws_s_start chan = [] slices = [] diff --git a/workflow/scripts/ome_zarr_to_nii.py b/workflow/scripts/ome_zarr_to_nii.py index db7ffa8..728e1c8 100644 --- a/workflow/scripts/ome_zarr_to_nii.py +++ b/workflow/scripts/ome_zarr_to_nii.py @@ -18,7 +18,12 @@ fs_args={} fs = get_fsspec(uri,**fs_args) -store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +if Path(uri).suffix == '.zip': + store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') +else: + store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + zi = zarr.open(store=store,mode='r') diff --git a/workflow/scripts/zarr_to_n5_bdv.py b/workflow/scripts/zarr_to_n5_bdv.py index 5a3c356..1ed8675 100644 --- a/workflow/scripts/zarr_to_n5_bdv.py +++ b/workflow/scripts/zarr_to_n5_bdv.py @@ -99,7 +99,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): ds_list=[] #for setup-level attrs for ds in range(max_downsampling_layers): step=2**ds #1,2,4,8.. - zstack = da.squeeze(darr[tile_i,chan_i,:,::step,::step]).astype(np.int16) + zstack = da.squeeze(darr[tile_i,chan_i,:,::step,::step]) print(f'writing to setup{setup_i}/timepoint0/s{ds}') with ProgressBar(): zstack.to_zarr(n5_store,component=f'setup{setup_i}/timepoint0/s{ds}',overwrite=True,compute=True)