Skip to content

Commit

Permalink
use zipstores for intermediate files
Browse files Browse the repository at this point in the history
- define separate work folder (workn5) for bigstitcher in (n5) and out (zarr)
since these are not zipstores..
- hopefully on graham, can use SLURM_TMPDIR for workn5, and project
space for bids and work (if zipstores enabled)
  • Loading branch information
akhanf committed Oct 27, 2024
1 parent 4562271 commit d3860ba
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 100 deletions.
17 changes: 11 additions & 6 deletions config/config.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
samples: 'config/samples.tsv'

root: 'bids' # can use a s3:// or gcs:// prefix to write output to cloud storage
work: 'work'
work: 'work' #intermediate files, uses zipstores if enabled
workn5: 'workn5' #where the bigstitcher temp files go (not using zipstores)

remote_creds: '~/.config/gcloud/application_default_credentials.json' #this is needed so we can pass creds to container

use_zipstore: False #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr
use_zipstore: True #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr, and zarr.zip for intermediate files

#total resources available, used to set rule resources
total_cores: 32
total_mem_mb: 128000
total_cores: 32
total_mem_mb: 128000

#import wildcards: tilex, tiley, channel, zslice (and prefix - unused)
import_blaze:
Expand All @@ -31,10 +32,14 @@ basic_flatfield_corr:
smoothness_flatfield: 1.0
smoothness_darkfield: 1.0
sparse_cost_darkfield: 0.01
output_chunks:
- 128
- 128
- 128


bigstitcher:
use_interestpoints: False
use_interestpoints: True
interest_points:
downsample_xy: 4
min_intensity: 0
Expand Down Expand Up @@ -72,7 +77,7 @@ bigstitcher:
block_size_factor_z: 32

ome_zarr:
desc: stitchedflatcorr
desc: stitchedraw
max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16}
rechunk_size: #z, y, x
- 1
Expand Down
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ container: config["containers"]["spimprep"]
# use expandvars so we can use e.g. '$SLURM_TMPDIR'
root = os.path.expandvars(config["root"])
work = os.path.expandvars(config["work"])
workn5 = os.path.expandvars(config["workn5"])
resampled = Path(root) / "derivatives" / "resampled"


Expand Down
26 changes: 14 additions & 12 deletions workflow/rules/bigstitcher.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@ rule zarr_to_bdv:
sample="{sample}",
acq="{acq}",
desc="{desc}",
suffix="SPIM.zarr",
suffix="SPIM.{ext}".format(
ext="zarr.zip" if config["use_zipstore"] else "zarr"
),
),
metadata_json=rules.copy_blaze_metadata.output.metadata_json,
params:
max_downsampling_layers=5,
temp_h5=str(
Path(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand All @@ -29,7 +31,7 @@ rule zarr_to_bdv:
temp_xml=str(
Path(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand All @@ -44,7 +46,7 @@ rule zarr_to_bdv:
bdv_n5=temp(
directory(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand All @@ -56,7 +58,7 @@ rule zarr_to_bdv:
),
bdv_xml=temp(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand Down Expand Up @@ -119,7 +121,7 @@ rule bigstitcher_stitching:
output:
dataset_xml=temp(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand Down Expand Up @@ -193,7 +195,7 @@ rule bigstitcher_detect_interestpoints:
output:
dataset_xml=temp(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand Down Expand Up @@ -252,7 +254,7 @@ rule bigstitcher_match_interestpoints:
output:
dataset_xml=temp(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand Down Expand Up @@ -320,7 +322,7 @@ rule bigstitcher_solver:
output:
dataset_xml=temp(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand Down Expand Up @@ -368,7 +370,7 @@ rule bigstitcher_solver:
rule bigstitcher_fusion:
input:
dataset_n5=bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand All @@ -377,7 +379,7 @@ rule bigstitcher_fusion:
suffix="bdv.n5",
),
dataset_xml=bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand Down Expand Up @@ -410,7 +412,7 @@ rule bigstitcher_fusion:
zarr=temp(
directory(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand Down
11 changes: 9 additions & 2 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ def get_extension_ome_zarr():
return "ome.zarr"


def intermediate_zarr(path):
if config["use_zipstore"]:
return temp(path + ".zarr.zip")
else:
return temp(directory(path + ".zarr"))


# targets
def get_all_targets():
targets = []
Expand Down Expand Up @@ -302,12 +309,12 @@ def get_output_ome_zarr(acq_type):
if config["use_zipstore"]:
return {
"zarr": bids(
root=work,
root=root,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq=f"{{acq,[a-zA-Z0-9]*{acq_type}[a-zA-Z0-9]*}}",
suffix="SPIM.ome.zarr",
suffix="SPIM.ome.zarr.zip",
)
}
else:
Expand Down
62 changes: 32 additions & 30 deletions workflow/rules/flatfield_corr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
rule fit_basic_flatfield_corr:
""" BaSiC flatfield correction"""
input:
zarr=lambda wildcards: bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
desc="rawfromgcs" if sample_is_remote(wildcards) else "raw",
suffix="SPIM.zarr",
).format(**wildcards),
zarr=lambda wildcards: intermediate_zarr(
bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
desc="rawfromgcs" if sample_is_remote(wildcards) else "raw",
suffix="SPIM",
).format(**wildcards)
),
params:
channel=lambda wildcards: get_stains(wildcards).index(wildcards.stain),
max_n_images=config["basic_flatfield_corr"]["max_n_images"],
Expand Down Expand Up @@ -64,34 +66,34 @@ rule fit_basic_flatfield_corr:
rule apply_basic_flatfield_corr:
""" apply BaSiC flatfield correction """
input:
zarr=lambda wildcards: bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
desc="rawfromgcs" if sample_is_remote(wildcards) else "raw",
suffix="SPIM.zarr",
).format(**wildcards),
zarr=lambda wildcards: intermediate_zarr(
bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
desc="rawfromgcs" if sample_is_remote(wildcards) else "raw",
suffix="SPIM",
).format(**wildcards)
),
model_dirs=lambda wildcards: expand(
rules.fit_basic_flatfield_corr.output.model_dir,
stain=get_stains(wildcards),
allow_missing=True,
),
params:
out_chunks=[128, 128, 128],
out_chunks=config["basic_flatfield_corr"]["output_chunks"],
output:
zarr=temp(
directory(
bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
desc="flatcorr",
suffix="SPIM.zarr",
)
zarr=intermediate_zarr(
bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
desc="flatcorr",
suffix="SPIM",
)
),
benchmark:
Expand Down
22 changes: 11 additions & 11 deletions workflow/rules/import.smk
Original file line number Diff line number Diff line change
Expand Up @@ -193,18 +193,18 @@ rule tif_to_zarr:
metadata_json=rules.copy_blaze_metadata.output.metadata_json,
params:
intensity_rescaling=config["import_blaze"]["intensity_rescaling"],
do_rechunking=True if config["ome_zarr"]["desc"][-3:] == "raw" else False,
out_chunks=config["basic_flatfield_corr"]["output_chunks"],
output:
zarr=temp(
directory(
bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
desc="raw",
suffix="SPIM.zarr",
)
zarr=intermediate_zarr(
bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
desc="raw",
suffix="SPIM",
)
),
benchmark:
Expand Down
37 changes: 1 addition & 36 deletions workflow/rules/ome_zarr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ rule zarr_to_ome_zarr:
**get_storage_creds(),
zarr=lambda wildcards: expand(
bids(
root=work,
root=workn5,
subject="{subject}",
datatype="micr",
sample="{sample}",
Expand Down Expand Up @@ -89,41 +89,6 @@ rule tif_stacks_to_ome_zarr:
"../scripts/tif_stacks_to_ome_zarr.py"


rule ome_zarr_to_zipstore:
""" use 7zip to create a zipstore """
input:
zarr=bids(
root=work,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
suffix="SPIM.ome.zarr",
),
output:
zarr_zip=bids(
root=root,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
suffix="SPIM.ome.zarr.zip",
),
log:
bids(
root="logs",
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
suffix="log.txt",
),
group:
"preproc"
shell:
"7z a -mx0 -tzip {output.zarr_zip} {input.zarr}/. &> {log}"


rule ome_zarr_to_nii:
input:
**get_storage_creds(),
Expand Down
10 changes: 9 additions & 1 deletion workflow/scripts/apply_basic_flatfield_corr_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from basicpy import BaSiC
from pathlib import Path
import numpy as np
import zarr
from skimage.transform import resize
import dask.array as da
from dask.diagnostics import ProgressBar
Expand Down Expand Up @@ -49,5 +50,12 @@ def apply_basic_parallel(x):
#stack along chans
arr_stacked = da.stack(chan_arr_list,axis=1).rechunk([1,1] + snakemake.params.out_chunks)

#now we can do the computation itself, storing to zarr
if Path(snakemake.output.zarr).suffix == '.zip':
store = zarr.storage.ZipStore(snakemake.output.zarr,dimension_separator='/',mode='w')
else:
store = zarr.storage.DirectoryStore(snakemake.output.zarr,dimension_separator='/')


with ProgressBar():
da.to_zarr(arr_stacked,snakemake.output.zarr,overwrite=True,dimension_separator='/')
da.to_zarr(arr_stacked,store,overwrite=True)
Loading

0 comments on commit d3860ba

Please sign in to comment.