Skip to content

Commit

Permalink
use bigstitcher-spark exclusively instead of fiji
Browse files Browse the repository at this point in the history
- can consider cleaning up repo and deps because of this, maybe once we
test it out on more samples
- also fixes a ome_zarr_to_nii bug when dealing with ome.zarr.zip
  • Loading branch information
akhanf committed Sep 25, 2024
1 parent d1fa55c commit f60d1e0
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 82 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ or for snakemake<8.0, use:
snakemake -c all --use-singularity
```

Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher:mem_mb=60000 fuse_dataset:mem_mb=100000`
Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_spark_stitching:mem_mb=60000 bigstitcher_spark_fusion:mem_mb=100000`

7. If you want to run the workflow using a batch job submission server, please see the executor plugins here: https://snakemake.github.io/snakemake-plugin-catalog/

Expand Down
28 changes: 13 additions & 15 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ work: 'work'

remote_creds: '~/.config/gcloud/application_default_credentials.json' #this is needed so we can pass creds to container

write_ome_zarr_direct: False #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk
write_ome_zarr_direct: True #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk

cores_per_rule: 32

Expand Down Expand Up @@ -36,18 +36,18 @@ bigstitcher:
downsample_in_x: 4
downsample_in_y: 4
downsample_in_z: 1
method: "phase_corr"
methods:
phase_corr: "Phase Correlation"
optical_flow: "Lucas-Kanade"
method: "phase_corr" #unused
methods: #unused
phase_corr: "Phase Correlation"
optical_flow: "Lucas-Kanade"
filter_pairwise_shifts:
enabled: 1
min_r: 0.2
max_shift_total: 50
enabled: 1 #unused
min_r: 0.7
max_shift_total: 50
global_optimization:
enabled: 1
method: ONE_ROUND_SIMPLE
methods:
method: TWO_ROUND_ITERATIVE
methods: #unused, only for reference
ONE_ROUND_SIMPLE: "One-Round"
ONE_ROUND_ITERATIVE: "One-Round with iterative dropping of bad links"
TWO_ROUND_SIMPLE: "Two-Round using metadata to align unconnected Tiles"
Expand All @@ -64,7 +64,7 @@ bigstitcher:
block_size_factor_z: 32

ome_zarr:
desc: sparkstitchedflatcorr
desc: sparkstitchedraw #flatcorr
max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16}
rechunk_size: #z, y, x
- 1
Expand Down Expand Up @@ -100,7 +100,7 @@ ome_zarr:
id: 0
name: spim
version: "0.4"
use_zipstore: True #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr
use_zipstore: False #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr

nifti:
levels: #cannot be higher than max_downsampling_layers in ome_zarr
Expand Down Expand Up @@ -155,7 +155,5 @@ report:


containers:
#spimprep: 'docker://khanlab/spimprep-deps:main'
spimprep: '/local/scratch/spimprep-deps_spark.sif'
# updated.sif'
spimprep: 'docker://khanlab/spimprep-deps:main'

2 changes: 1 addition & 1 deletion workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ rule all:
input:
get_all_targets(),
get_bids_toplevel_targets(),
# get_qc_targets(), #need to skip this if using prestitched
get_qc_targets(), #need to skip this if using prestitched
localrule: True


Expand Down
84 changes: 57 additions & 27 deletions workflow/rules/bigstitcher.smk
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ rule zarr_to_bdv:
script:
"../scripts/zarr_to_n5_bdv.py"

"""

rule bigstitcher:
input:
dataset_n5=rules.zarr_to_bdv.output.bdv_n5,
Expand All @@ -112,7 +112,7 @@ rule bigstitcher:
sample="{sample}",
acq="{acq}",
desc="{desc}",
suffix="bigstitcherproc.sh",
suffix="bigstitcherfiji.sh",
)
),
dataset_xml=temp(
Expand All @@ -123,13 +123,13 @@ rule bigstitcher:
sample="{sample}",
acq="{acq}",
desc="{desc}",
suffix="bigstitcher.xml",
suffix="bigstitcherfiji.xml",
)
),
benchmark:
bids(
root="benchmarks",
datatype="bigstitcherproc",
datatype="bigstitcherfiji",
subject="{subject}",
sample="{sample}",
acq="{acq}",
Expand All @@ -139,7 +139,7 @@ rule bigstitcher:
log:
bids(
root="logs",
datatype="bigstitcherproc",
datatype="bigstitcherfiji",
subject="{subject}",
sample="{sample}",
acq="{acq}",
Expand All @@ -159,19 +159,30 @@ rule bigstitcher:
" {params.fiji_launcher_cmd} && "
" echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} "
" && {output.launcher} |& tee {log} && {params.rm_old_xml}"
"""


rule bigstitcher_spark_stitching:
input:
dataset_n5=rules.zarr_to_bdv.output.bdv_n5,
dataset_xml=rules.zarr_to_bdv.output.bdv_xml,
params:
downsampling='--downsampling={dsx},{dsy},{dsz}'.format(dsx=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_x'],
dsy=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_y'],
dsz=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_z']),
min_r='--minR={min_r}'.format(min_r=config['bigstitcher']['filter_pairwise_shifts']['min_r']),
max_shift='--maxShiftTotal={max_shift}'.format(max_shift=config['bigstitcher']['filter_pairwise_shifts']['max_shift_total']),
mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000))
downsampling="--downsampling={dsx},{dsy},{dsz}".format(
dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"],
dsy=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"],
dsz=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"],
),
min_r="--minR={min_r}".format(
min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"]
),
max_shift="--maxShiftTotal={max_shift}".format(
max_shift=config["bigstitcher"]["filter_pairwise_shifts"][
"max_shift_total"
]
),
mem_gb=lambda wildcards, resources: "{mem_gb}".format(
mem_gb=int(resources.mem_mb / 1000)
),
rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?",
output:
dataset_xml=temp(
bids(
Expand Down Expand Up @@ -215,19 +226,27 @@ rule bigstitcher_spark_stitching:
shell:
"cp {input.dataset_xml} {output.dataset_xml} && "
"stitching {params.mem_gb} {threads} -x {output.dataset_xml} "
" {params.min_r} {params.downsampling} "
" {params.min_r} {params.downsampling} {params.max_shift} && "
"{params.rm_old_xml}"


rule bigstitcher_spark_solver:
input:
dataset_n5=rules.zarr_to_bdv.output.bdv_n5,
dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml,
params:
downsampling='--downsampling={dsx},{dsy},{dsz}'.format(dsx=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_x'],
dsy=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_y'],
dsz=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_z']),
method='--method={method}'.format(method=config['bigstitcher']['global_optimization']['method']),
mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000))
downsampling="--downsampling={dsx},{dsy},{dsz}".format(
dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"],
dsy=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"],
dsz=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"],
),
method="--method={method}".format(
method=config["bigstitcher"]["global_optimization"]["method"]
),
mem_gb=lambda wildcards, resources: "{mem_gb}".format(
mem_gb=int(resources.mem_mb / 1000)
),
rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?",
output:
dataset_xml=temp(
bids(
Expand Down Expand Up @@ -271,12 +290,13 @@ rule bigstitcher_spark_solver:
shell:
"cp {input.dataset_xml} {output.dataset_xml} && "
"solver {params.mem_gb} {threads} -x {output.dataset_xml} "
" -s STITCHING --lambda 0.1 " #lambda 0.1 is default (can expose this if needed)
" {params.method} "

" -s STITCHING --lambda 0.1 "
" {params.method} && "
"{params.rm_old_xml}"
#lambda 0.1 is default (can expose this if needed)


rule fuse_dataset:
rule bigstitcher_fusion:
input:
dataset_n5=bids(
root=work,
Expand All @@ -294,7 +314,11 @@ rule fuse_dataset:
sample="{sample}",
acq="{acq}",
desc="{desc}",
suffix="bigstitcher{}.xml".format('solver' if config['bigstitcher']['global_optimization']['enabled'] else 'stitching'),
suffix="bigstitcher{}.xml".format(
"solver"
if config["bigstitcher"]["global_optimization"]["enabled"]
else "stitching"
),
),
ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm",
params:
Expand Down Expand Up @@ -363,7 +387,7 @@ rule fuse_dataset:
" && {output.launcher} |& tee {log}"


rule fuse_dataset_spark:
rule bigstitcher_spark_fusion:
input:
dataset_n5=bids(
root=work,
Expand All @@ -381,7 +405,11 @@ rule fuse_dataset_spark:
sample="{sample}",
acq="{acq}",
desc="{desc}",
suffix="bigstitcher{}.xml".format('solver' if config['bigstitcher']['global_optimization']['enabled'] else 'stitching'),
suffix="bigstitcher{}.xml".format(
"solver"
if config["bigstitcher"]["global_optimization"]["enabled"]
else "stitching"
),
),
ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm",
params:
Expand All @@ -398,7 +426,9 @@ rule fuse_dataset_spark:
bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"],
bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"],
),
mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000))
mem_gb=lambda wildcards, resources: "{mem_gb}".format(
mem_gb=int(resources.mem_mb / 1000)
),
output:
zarr=temp(
directory(
Expand Down Expand Up @@ -440,7 +470,7 @@ rule fuse_dataset_spark:
config["containers"]["spimprep"]
resources:
runtime=30,
mem_mb=40000,
mem_mb=30000,
threads: config["cores_per_rule"]
group:
"preproc"
Expand Down
7 changes: 0 additions & 7 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -368,13 +368,6 @@ def get_output_ome_zarr(acq_type):
}


def get_output_ome_zarr_as_input(wildcards):
if 'blaze' in wildcards.acq:
return get_output_ome_zarr('blaze')
elif 'prestitched' in wildcards.acq:
return get_output_ome_zarr('prestitched')


def get_storage_creds():
"""for rules that deal with remote storage directly"""
protocol = Path(config["root"]).protocol
Expand Down
15 changes: 8 additions & 7 deletions workflow/rules/ome_zarr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,15 @@ rule ome_zarr_to_zipstore:

rule ome_zarr_to_nii:
input:
zarr=bids(
root=root,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr())),
**get_storage_creds(),
zarr=bids(
root=root,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()),
),
params:
channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain),
uri=get_output_ome_zarr_uri(),
Expand Down
18 changes: 16 additions & 2 deletions workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,15 @@ rule generate_flatfield_qc:
rule generate_whole_slice_qc:
"Generates an html file to view whole slices from preprocessed images"
input:
unpack(get_output_ome_zarr_as_input),
**get_storage_creds(),
zarr=bids(
root=root,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()),
),
ws_html=config["report"]["resources"]["ws_html"],
params:
ws_s_start=config["report"]["whole_slice_viewer"]["slice_start"],
Expand Down Expand Up @@ -112,8 +119,15 @@ rule generate_whole_slice_qc:
rule generate_volume_qc:
"Generates an html file to view the volume rendered image"
input:
unpack(get_output_ome_zarr_as_input),
**get_storage_creds(),
zarr=bids(
root=root,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()),
),
vol_viewer_dir=config["report"]["resources"]["vol_viewer_dir"],
params:
uri=get_output_ome_zarr_uri(),
Expand Down
8 changes: 5 additions & 3 deletions workflow/scripts/generate_volume_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
# where html file should be written
html_dest = snakemake.output.html

# inputted ome-zarr path
ome_data = snakemake.input.zarr

# move volume renderer into the subjects directory
copy_tree(snakemake.input.vol_viewer_dir, resource_dir)
Expand All @@ -28,7 +26,11 @@
fs_args={}

fs = get_fsspec(uri,**fs_args)
store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r')

if Path(uri).suffix == '.zip':
store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r')
else:
store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r')
darr = da.from_zarr(store,component='/5')

# Get most downsampled ome-zarr image
Expand Down
Loading

0 comments on commit f60d1e0

Please sign in to comment.