From 04573849c7a38c38dd9e6f976472e767e165dd7b Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sat, 14 Sep 2024 11:17:21 -0400 Subject: [PATCH 1/9] Update chunking for fusion Trying to fix the black bar bug --- config/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/config/config.yml b/config/config.yml index 3d6e0a7..da9da0f 100644 --- a/config/config.yml +++ b/config/config.yml @@ -55,12 +55,12 @@ bigstitcher: fuse_dataset: downsampling: 1 - block_size_x: 256 # for storage - block_size_y: 256 - block_size_z: 1 - block_size_factor_x: 1 #e.g. 2 will use 2*block_size for computation - block_size_factor_y: 1 - block_size_factor_z: 256 + block_size_x: 128 # for storage + block_size_y: 128 + block_size_z: 128 + block_size_factor_x: 2 #e.g. 2 will use 2*block_size for computation + block_size_factor_y: 2 + block_size_factor_z: 2 ome_zarr: desc: stitchedflatcorr From 8943f639c6f27ee3af694bf5f7e1168b71d63ac5 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sun, 15 Sep 2024 10:39:21 -0400 Subject: [PATCH 2/9] fix type casting had casting to int16 but datatype specified as uint16 - could explain why black bar problem got worse.. hopefully using int16 (instead of uint16) actually solves it now --- workflow/scripts/zarr_to_n5_bdv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/scripts/zarr_to_n5_bdv.py b/workflow/scripts/zarr_to_n5_bdv.py index 5a3c356..48fb89e 100644 --- a/workflow/scripts/zarr_to_n5_bdv.py +++ b/workflow/scripts/zarr_to_n5_bdv.py @@ -111,6 +111,6 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): #add attributes for downsampling as a list, and datatype to the setup# level g = zarr.open_group(store=n5_store,path=f'setup{setup_i}',mode='r+') g.attrs['downsamplingFactors']=ds_list - g.attrs['dataType']='uint16' + g.attrs['dataType']='int16' From 3dedae212b48e73377b55fb8ef4c328b1ceb405a Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sun, 15 Sep 2024 10:41:59 -0400 Subject: [PATCH 3/9] use int16 throughout --- workflow/scripts/apply_basic_flatfield_corr_zarr.py | 2 +- workflow/scripts/tif_to_zarr.py | 2 +- workflow/scripts/tif_to_zarr_gcs.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/workflow/scripts/apply_basic_flatfield_corr_zarr.py b/workflow/scripts/apply_basic_flatfield_corr_zarr.py index acbbb4e..01bdddf 100644 --- a/workflow/scripts/apply_basic_flatfield_corr_zarr.py +++ b/workflow/scripts/apply_basic_flatfield_corr_zarr.py @@ -41,7 +41,7 @@ #now we want to apply correction to all images #define a function to map def apply_basic_parallel(x): - return np.reshape(basic.transform(x.squeeze()),(1,1,img_shape[0],img_shape[1])).astype('uint16') + return np.reshape(basic.transform(x.squeeze()),(1,1,img_shape[0],img_shape[1])).astype('int16') arr_corr = da.map_blocks(apply_basic_parallel,arr_chan) chan_arr_list.append(arr_corr) diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index bfd4a86..1b8aa75 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -59,7 +59,7 @@ def single_imread(*args): #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling -darr = darr.astype('uint16') +darr = darr.astype('int16') #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index 01a9af1..c49a770 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -77,7 +77,7 @@ def build_zstack(gcs_uris,fs): #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling -darr = darr.astype('uint16') +darr = darr.astype('int16') #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') From 068d422735a5a14f25dec966bfd98071a8200a05 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sun, 15 Sep 2024 10:50:28 -0400 Subject: [PATCH 4/9] revert back to 2d output chunks --- config/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/config/config.yml b/config/config.yml index da9da0f..3d6e0a7 100644 --- a/config/config.yml +++ b/config/config.yml @@ -55,12 +55,12 @@ bigstitcher: fuse_dataset: downsampling: 1 - block_size_x: 128 # for storage - block_size_y: 128 - block_size_z: 128 - block_size_factor_x: 2 #e.g. 2 will use 2*block_size for computation - block_size_factor_y: 2 - block_size_factor_z: 2 + block_size_x: 256 # for storage + block_size_y: 256 + block_size_z: 1 + block_size_factor_x: 1 #e.g. 2 will use 2*block_size for computation + block_size_factor_y: 1 + block_size_factor_z: 256 ome_zarr: desc: stitchedflatcorr From 2357f4c8d83b22c98ce573901d717ffbaa0580a0 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 24 Sep 2024 16:47:06 -0400 Subject: [PATCH 5/9] updates to get bigstitcher-spark fusion working spimprep-deps WIP --- workflow/rules/bigstitcher.smk | 22 ++++++++++++++++++- .../apply_basic_flatfield_corr_zarr.py | 2 +- workflow/scripts/tif_to_zarr.py | 2 +- workflow/scripts/tif_to_zarr_gcs.py | 2 +- workflow/scripts/zarr_to_n5_bdv.py | 4 ++-- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index 7e3c0c4..62d39f2 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -269,6 +269,20 @@ rule fuse_dataset_spark: suffix="bigstitcher.xml", ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", + params: + channel=lambda wildcards: "--channelId={channel}".format( + channel=get_stains(wildcards).index(wildcards.stain) + ), + block_size="--blockSize={bsx},{bsy},{bsz}".format( + bsx=config["bigstitcher"]["fuse_dataset"]["block_size_x"], + bsy=config["bigstitcher"]["fuse_dataset"]["block_size_y"], + bsz=config["bigstitcher"]["fuse_dataset"]["block_size_z"], + ), + block_size_factor="--blockScale={bsfx},{bsfy},{bsfz}".format( + bsfx=config["bigstitcher"]["fuse_dataset"]["block_size_factor_x"], + bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], + bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], + ), output: zarr=temp( directory( @@ -315,4 +329,10 @@ rule fuse_dataset_spark: group: "preproc" shell: - "affine-fusion ..." + "affine-fusion --preserveAnisotropy -x {input.dataset_xml} " + " -o {output.zarr} -d /fused/s0 -s ZARR " + " --UINT16 --minIntensity 0 --maxIntensity 65535 " + + +# " --UINT8 --minIntensity 0 --maxIntensity 255 " +"{params}" # all the params diff --git a/workflow/scripts/apply_basic_flatfield_corr_zarr.py b/workflow/scripts/apply_basic_flatfield_corr_zarr.py index 01bdddf..acbbb4e 100644 --- a/workflow/scripts/apply_basic_flatfield_corr_zarr.py +++ b/workflow/scripts/apply_basic_flatfield_corr_zarr.py @@ -41,7 +41,7 @@ #now we want to apply correction to all images #define a function to map def apply_basic_parallel(x): - return np.reshape(basic.transform(x.squeeze()),(1,1,img_shape[0],img_shape[1])).astype('int16') + return np.reshape(basic.transform(x.squeeze()),(1,1,img_shape[0],img_shape[1])).astype('uint16') arr_corr = da.map_blocks(apply_basic_parallel,arr_chan) chan_arr_list.append(arr_corr) diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index 1b8aa75..bfd4a86 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -59,7 +59,7 @@ def single_imread(*args): #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling -darr = darr.astype('int16') +darr = darr.astype('uint16') #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') diff --git a/workflow/scripts/tif_to_zarr_gcs.py b/workflow/scripts/tif_to_zarr_gcs.py index c49a770..01a9af1 100644 --- a/workflow/scripts/tif_to_zarr_gcs.py +++ b/workflow/scripts/tif_to_zarr_gcs.py @@ -77,7 +77,7 @@ def build_zstack(gcs_uris,fs): #rescale intensities, and recast darr = darr * snakemake.params.intensity_rescaling -darr = darr.astype('int16') +darr = darr.astype('uint16') #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') diff --git a/workflow/scripts/zarr_to_n5_bdv.py b/workflow/scripts/zarr_to_n5_bdv.py index 48fb89e..1ed8675 100644 --- a/workflow/scripts/zarr_to_n5_bdv.py +++ b/workflow/scripts/zarr_to_n5_bdv.py @@ -99,7 +99,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): ds_list=[] #for setup-level attrs for ds in range(max_downsampling_layers): step=2**ds #1,2,4,8.. - zstack = da.squeeze(darr[tile_i,chan_i,:,::step,::step]).astype(np.int16) + zstack = da.squeeze(darr[tile_i,chan_i,:,::step,::step]) print(f'writing to setup{setup_i}/timepoint0/s{ds}') with ProgressBar(): zstack.to_zarr(n5_store,component=f'setup{setup_i}/timepoint0/s{ds}',overwrite=True,compute=True) @@ -111,6 +111,6 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): #add attributes for downsampling as a list, and datatype to the setup# level g = zarr.open_group(store=n5_store,path=f'setup{setup_i}',mode='r+') g.attrs['downsamplingFactors']=ds_list - g.attrs['dataType']='int16' + g.attrs['dataType']='uint16' From c3586de292efb49b572da00a0b22468ff6e340ed Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 24 Sep 2024 23:43:11 -0400 Subject: [PATCH 6/9] updates for fuse-spark --- workflow/rules/bigstitcher.smk | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index 62d39f2..5a47aa2 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -283,6 +283,7 @@ rule fuse_dataset_spark: bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], ), + mem_gb=lambda wikdcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) output: zarr=temp( directory( @@ -329,10 +330,7 @@ rule fuse_dataset_spark: group: "preproc" shell: - "affine-fusion --preserveAnisotropy -x {input.dataset_xml} " + "affine-fusion {params.mem_gb} {threads} --preserveAnisotropy -x {input.dataset_xml} " " -o {output.zarr} -d /fused/s0 -s ZARR " " --UINT16 --minIntensity 0 --maxIntensity 65535 " - - -# " --UINT8 --minIntensity 0 --maxIntensity 255 " -"{params}" # all the params + "{params.block_size} {params.block_size_factor} {params.channel}" From d1fa55ccbe384078b4e79e418f2e15be1946dcfc Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Wed, 25 Sep 2024 12:18:49 -0400 Subject: [PATCH 7/9] WIP spark --- config/config.yml | 31 ++--- workflow/rules/bigstitcher.smk | 127 +++++++++++++++++++- workflow/rules/common.smk | 38 ++---- workflow/rules/ome_zarr.smk | 8 +- workflow/rules/qc.smk | 4 +- workflow/scripts/generate_volume_qc.py | 2 +- workflow/scripts/generate_whole_slice_qc.py | 2 +- workflow/scripts/ome_zarr_to_nii.py | 14 ++- 8 files changed, 169 insertions(+), 57 deletions(-) diff --git a/config/config.yml b/config/config.yml index 3d6e0a7..6a5c684 100644 --- a/config/config.yml +++ b/config/config.yml @@ -6,7 +6,7 @@ work: 'work' remote_creds: '~/.config/gcloud/application_default_credentials.json' #this is needed so we can pass creds to container -write_ome_zarr_direct: True #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk +write_ome_zarr_direct: False #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk cores_per_rule: 32 @@ -42,28 +42,29 @@ bigstitcher: optical_flow: "Lucas-Kanade" filter_pairwise_shifts: enabled: 1 - min_r: 0.7 + min_r: 0.2 + max_shift_total: 50 global_optimization: - enabled: 1 - strategy: two_round - strategies: - one_round: "One-Round" - one_round_iterative: "One-Round with iterative dropping of bad links" - two_round: "Two-Round using metadata to align unconnected Tiles" - two_round_iterative: "Two-Round using Metadata to align unconnected Tiles and iterative dropping of bad links" + enabled: 1 + method: ONE_ROUND_SIMPLE + methods: + ONE_ROUND_SIMPLE: "One-Round" + ONE_ROUND_ITERATIVE: "One-Round with iterative dropping of bad links" + TWO_ROUND_SIMPLE: "Two-Round using metadata to align unconnected Tiles" + TWO_ROUND_ITERATIVE: "Two-Round using Metadata to align unconnected Tiles and iterative dropping of bad links" fuse_dataset: downsampling: 1 block_size_x: 256 # for storage block_size_y: 256 - block_size_z: 1 + block_size_z: 8 block_size_factor_x: 1 #e.g. 2 will use 2*block_size for computation block_size_factor_y: 1 - block_size_factor_z: 256 + block_size_factor_z: 32 ome_zarr: - desc: stitchedflatcorr + desc: sparkstitchedflatcorr max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1 @@ -99,7 +100,7 @@ ome_zarr: id: 0 name: spim version: "0.4" - use_zipstore: False #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr + use_zipstore: True #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr nifti: levels: #cannot be higher than max_downsampling_layers in ome_zarr @@ -154,5 +155,7 @@ report: containers: - spimprep: 'docker://khanlab/spimprep-deps:main' + #spimprep: 'docker://khanlab/spimprep-deps:main' + spimprep: '/local/scratch/spimprep-deps_spark.sif' + # updated.sif' diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index 5a47aa2..ebd2d49 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -93,7 +93,7 @@ rule zarr_to_bdv: script: "../scripts/zarr_to_n5_bdv.py" - +""" rule bigstitcher: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, @@ -159,6 +159,121 @@ rule bigstitcher: " {params.fiji_launcher_cmd} && " " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " " && {output.launcher} |& tee {log} && {params.rm_old_xml}" +""" + +rule bigstitcher_spark_stitching: + input: + dataset_n5=rules.zarr_to_bdv.output.bdv_n5, + dataset_xml=rules.zarr_to_bdv.output.bdv_xml, + params: + downsampling='--downsampling={dsx},{dsy},{dsz}'.format(dsx=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_x'], + dsy=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_y'], + dsz=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_z']), + min_r='--minR={min_r}'.format(min_r=config['bigstitcher']['filter_pairwise_shifts']['min_r']), + max_shift='--maxShiftTotal={max_shift}'.format(max_shift=config['bigstitcher']['filter_pairwise_shifts']['max_shift_total']), + mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + output: + dataset_xml=temp( + bids( + root=work, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="bigstitcherstitching.xml", + ) + ), + benchmark: + bids( + root="benchmarks", + datatype="bigstitcherstitching", + subject="{subject}", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="benchmark.tsv", + ) + log: + bids( + root="logs", + datatype="bigstitcherproc", + subject="{subject}", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="log.txt", + ), + container: + config["containers"]["spimprep"] + resources: + runtime=30, + mem_mb=40000, + threads: config["cores_per_rule"] + group: + "preproc" + shell: + "cp {input.dataset_xml} {output.dataset_xml} && " + "stitching {params.mem_gb} {threads} -x {output.dataset_xml} " + " {params.min_r} {params.downsampling} " + + +rule bigstitcher_spark_solver: + input: + dataset_n5=rules.zarr_to_bdv.output.bdv_n5, + dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml, + params: + downsampling='--downsampling={dsx},{dsy},{dsz}'.format(dsx=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_x'], + dsy=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_y'], + dsz=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_z']), + method='--method={method}'.format(method=config['bigstitcher']['global_optimization']['method']), + mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + output: + dataset_xml=temp( + bids( + root=work, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="bigstitchersolver.xml", + ) + ), + benchmark: + bids( + root="benchmarks", + datatype="bigstitchersolver", + subject="{subject}", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="benchmark.tsv", + ) + log: + bids( + root="logs", + datatype="bigstitcherproc", + subject="{subject}", + sample="{sample}", + acq="{acq}", + desc="{desc}", + suffix="log.txt", + ), + container: + config["containers"]["spimprep"] + resources: + runtime=30, + mem_mb=40000, + threads: config["cores_per_rule"] + group: + "preproc" + shell: + "cp {input.dataset_xml} {output.dataset_xml} && " + "solver {params.mem_gb} {threads} -x {output.dataset_xml} " + " -s STITCHING --lambda 0.1 " #lambda 0.1 is default (can expose this if needed) + " {params.method} " + rule fuse_dataset: @@ -179,7 +294,7 @@ rule fuse_dataset: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher.xml", + suffix="bigstitcher{}.xml".format('solver' if config['bigstitcher']['global_optimization']['enabled'] else 'stitching'), ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: @@ -238,7 +353,7 @@ rule fuse_dataset: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=20000, + mem_mb=40000, threads: config["cores_per_rule"] group: "preproc" @@ -266,7 +381,7 @@ rule fuse_dataset_spark: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher.xml", + suffix="bigstitcher{}.xml".format('solver' if config['bigstitcher']['global_optimization']['enabled'] else 'stitching'), ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: @@ -283,7 +398,7 @@ rule fuse_dataset_spark: bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], ), - mem_gb=lambda wikdcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) output: zarr=temp( directory( @@ -325,7 +440,7 @@ rule fuse_dataset_spark: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=20000, + mem_mb=40000, threads: config["cores_per_rule"] group: "preproc" diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index e6b7ad0..0248dd3 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -312,7 +312,7 @@ def get_output_ome_zarr_uri(): datatype="micr", sample="{sample}", acq="{acq}", - suffix="SPIM.ome.zarr", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), ) else: return "local://" + _bids( @@ -321,7 +321,7 @@ def get_output_ome_zarr_uri(): datatype="micr", sample="{sample}", acq="{acq}", - suffix="SPIM.ome.zarr", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), ) @@ -368,35 +368,11 @@ def get_output_ome_zarr(acq_type): } -def get_input_ome_zarr_to_nii(): - if is_remote(config["root"]): - return bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{extension}".format(extension=get_extension_ome_zarr()), - ) - else: - if config["write_ome_zarr_direct"]: - return bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{extension}".format(extension=get_extension_ome_zarr()), - ) - else: - return bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix=f"SPIM.ome.zarr", - ) +def get_output_ome_zarr_as_input(wildcards): + if 'blaze' in wildcards.acq: + return get_output_ome_zarr('blaze') + elif 'prestitched' in wildcards.acq: + return get_output_ome_zarr('prestitched') def get_storage_creds(): diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk index 105fb8d..77a58b3 100644 --- a/workflow/rules/ome_zarr.smk +++ b/workflow/rules/ome_zarr.smk @@ -117,8 +117,14 @@ rule ome_zarr_to_zipstore: rule ome_zarr_to_nii: input: + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr())), **get_storage_creds(), - zarr=get_input_ome_zarr_to_nii(), params: channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain), uri=get_output_ome_zarr_uri(), diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index cb44298..a79289f 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -73,8 +73,8 @@ rule generate_flatfield_qc: rule generate_whole_slice_qc: "Generates an html file to view whole slices from preprocessed images" input: + unpack(get_output_ome_zarr_as_input), **get_storage_creds(), - ome=get_input_ome_zarr_to_nii(), ws_html=config["report"]["resources"]["ws_html"], params: ws_s_start=config["report"]["whole_slice_viewer"]["slice_start"], @@ -112,8 +112,8 @@ rule generate_whole_slice_qc: rule generate_volume_qc: "Generates an html file to view the volume rendered image" input: + unpack(get_output_ome_zarr_as_input), **get_storage_creds(), - ome=get_input_ome_zarr_to_nii(), vol_viewer_dir=config["report"]["resources"]["vol_viewer_dir"], params: uri=get_output_ome_zarr_uri(), diff --git a/workflow/scripts/generate_volume_qc.py b/workflow/scripts/generate_volume_qc.py index 1fb1398..d5c48d1 100644 --- a/workflow/scripts/generate_volume_qc.py +++ b/workflow/scripts/generate_volume_qc.py @@ -15,7 +15,7 @@ html_dest = snakemake.output.html # inputted ome-zarr path -ome_data = snakemake.input.ome +ome_data = snakemake.input.zarr # move volume renderer into the subjects directory copy_tree(snakemake.input.vol_viewer_dir, resource_dir) diff --git a/workflow/scripts/generate_whole_slice_qc.py b/workflow/scripts/generate_whole_slice_qc.py index ed64b21..f721128 100644 --- a/workflow/scripts/generate_whole_slice_qc.py +++ b/workflow/scripts/generate_whole_slice_qc.py @@ -20,7 +20,7 @@ ws_cmap=snakemake.params.ws_cmap # input ome-zarr file -ome= snakemake.input.ome +ome= snakemake.input.zarr # output paths image_dir = snakemake.output.images_dir diff --git a/workflow/scripts/ome_zarr_to_nii.py b/workflow/scripts/ome_zarr_to_nii.py index db7ffa8..720f9b4 100644 --- a/workflow/scripts/ome_zarr_to_nii.py +++ b/workflow/scripts/ome_zarr_to_nii.py @@ -18,7 +18,19 @@ fs_args={} fs = get_fsspec(uri,**fs_args) -store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +print(fs) +print(uri) +print(Path(uri)) +print(Path(uri).path) + +print(Path(uri).suffix) +if Path(uri).suffix == '.zip': + store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') +else: + store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +print(store) zi = zarr.open(store=store,mode='r') From f60d1e087959acbfdb3b7a2d4ddb597e6fe15e1d Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Wed, 25 Sep 2024 15:39:36 -0400 Subject: [PATCH 8/9] use bigstitcher-spark exclusively instead of fiji - can consider cleaning up repo and deps because of this, maybe once we test it out on more samples - also fixes a ome_zarr_to_nii bug when dealing with ome.zarr.zip --- README.md | 2 +- config/config.yml | 28 ++++--- workflow/Snakefile | 2 +- workflow/rules/bigstitcher.smk | 84 ++++++++++++++------- workflow/rules/common.smk | 7 -- workflow/rules/ome_zarr.smk | 15 ++-- workflow/rules/qc.smk | 18 ++++- workflow/scripts/generate_volume_qc.py | 8 +- workflow/scripts/generate_whole_slice_qc.py | 20 ++--- workflow/scripts/ome_zarr_to_nii.py | 7 -- 10 files changed, 109 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index 7ba1275..e5d5b8d 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ or for snakemake<8.0, use: snakemake -c all --use-singularity ``` -Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher:mem_mb=60000 fuse_dataset:mem_mb=100000` +Note: if you run the workflow on a system with large memory, you will need to set the heap size for the stitching and fusion rules. This can be done with e.g.: `--set-resources bigstitcher_spark_stitching:mem_mb=60000 bigstitcher_spark_fusion:mem_mb=100000` 7. If you want to run the workflow using a batch job submission server, please see the executor plugins here: https://snakemake.github.io/snakemake-plugin-catalog/ diff --git a/config/config.yml b/config/config.yml index 6a5c684..7fa2b4c 100644 --- a/config/config.yml +++ b/config/config.yml @@ -6,7 +6,7 @@ work: 'work' remote_creds: '~/.config/gcloud/application_default_credentials.json' #this is needed so we can pass creds to container -write_ome_zarr_direct: False #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk +write_ome_zarr_direct: True #use this to skip writing the final zarr output to work first and copying afterwards -- useful when work is not a fast local disk cores_per_rule: 32 @@ -36,18 +36,18 @@ bigstitcher: downsample_in_x: 4 downsample_in_y: 4 downsample_in_z: 1 - method: "phase_corr" - methods: - phase_corr: "Phase Correlation" - optical_flow: "Lucas-Kanade" + method: "phase_corr" #unused + methods: #unused + phase_corr: "Phase Correlation" + optical_flow: "Lucas-Kanade" filter_pairwise_shifts: - enabled: 1 - min_r: 0.2 - max_shift_total: 50 + enabled: 1 #unused + min_r: 0.7 + max_shift_total: 50 global_optimization: enabled: 1 - method: ONE_ROUND_SIMPLE - methods: + method: TWO_ROUND_ITERATIVE + methods: #unused, only for reference ONE_ROUND_SIMPLE: "One-Round" ONE_ROUND_ITERATIVE: "One-Round with iterative dropping of bad links" TWO_ROUND_SIMPLE: "Two-Round using metadata to align unconnected Tiles" @@ -64,7 +64,7 @@ bigstitcher: block_size_factor_z: 32 ome_zarr: - desc: sparkstitchedflatcorr + desc: sparkstitchedraw #flatcorr max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1 @@ -100,7 +100,7 @@ ome_zarr: id: 0 name: spim version: "0.4" - use_zipstore: True #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr + use_zipstore: False #if True, produce SPIM.ome.zarr.zip instead of SPIM.ome.zarr nifti: levels: #cannot be higher than max_downsampling_layers in ome_zarr @@ -155,7 +155,5 @@ report: containers: - #spimprep: 'docker://khanlab/spimprep-deps:main' - spimprep: '/local/scratch/spimprep-deps_spark.sif' - # updated.sif' + spimprep: 'docker://khanlab/spimprep-deps:main' diff --git a/workflow/Snakefile b/workflow/Snakefile index 14abdd6..cd19ab3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -33,7 +33,7 @@ rule all: input: get_all_targets(), get_bids_toplevel_targets(), - # get_qc_targets(), #need to skip this if using prestitched + get_qc_targets(), #need to skip this if using prestitched localrule: True diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index ebd2d49..4256e3b 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -93,7 +93,7 @@ rule zarr_to_bdv: script: "../scripts/zarr_to_n5_bdv.py" -""" + rule bigstitcher: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, @@ -112,7 +112,7 @@ rule bigstitcher: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcherproc.sh", + suffix="bigstitcherfiji.sh", ) ), dataset_xml=temp( @@ -123,13 +123,13 @@ rule bigstitcher: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher.xml", + suffix="bigstitcherfiji.xml", ) ), benchmark: bids( root="benchmarks", - datatype="bigstitcherproc", + datatype="bigstitcherfiji", subject="{subject}", sample="{sample}", acq="{acq}", @@ -139,7 +139,7 @@ rule bigstitcher: log: bids( root="logs", - datatype="bigstitcherproc", + datatype="bigstitcherfiji", subject="{subject}", sample="{sample}", acq="{acq}", @@ -159,19 +159,30 @@ rule bigstitcher: " {params.fiji_launcher_cmd} && " " echo ' -macro {input.ijm} \"{params.macro_args}\"' >> {output.launcher} " " && {output.launcher} |& tee {log} && {params.rm_old_xml}" -""" + rule bigstitcher_spark_stitching: input: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, dataset_xml=rules.zarr_to_bdv.output.bdv_xml, params: - downsampling='--downsampling={dsx},{dsy},{dsz}'.format(dsx=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_x'], - dsy=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_y'], - dsz=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_z']), - min_r='--minR={min_r}'.format(min_r=config['bigstitcher']['filter_pairwise_shifts']['min_r']), - max_shift='--maxShiftTotal={max_shift}'.format(max_shift=config['bigstitcher']['filter_pairwise_shifts']['max_shift_total']), - mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + downsampling="--downsampling={dsx},{dsy},{dsz}".format( + dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], + dsy=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], + dsz=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], + ), + min_r="--minR={min_r}".format( + min_r=config["bigstitcher"]["filter_pairwise_shifts"]["min_r"] + ), + max_shift="--maxShiftTotal={max_shift}".format( + max_shift=config["bigstitcher"]["filter_pairwise_shifts"][ + "max_shift_total" + ] + ), + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), + rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", output: dataset_xml=temp( bids( @@ -215,7 +226,8 @@ rule bigstitcher_spark_stitching: shell: "cp {input.dataset_xml} {output.dataset_xml} && " "stitching {params.mem_gb} {threads} -x {output.dataset_xml} " - " {params.min_r} {params.downsampling} " + " {params.min_r} {params.downsampling} {params.max_shift} && " + "{params.rm_old_xml}" rule bigstitcher_spark_solver: @@ -223,11 +235,18 @@ rule bigstitcher_spark_solver: dataset_n5=rules.zarr_to_bdv.output.bdv_n5, dataset_xml=rules.bigstitcher_spark_stitching.output.dataset_xml, params: - downsampling='--downsampling={dsx},{dsy},{dsz}'.format(dsx=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_x'], - dsy=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_y'], - dsz=config['bigstitcher']['calc_pairwise_shifts']['downsample_in_z']), - method='--method={method}'.format(method=config['bigstitcher']['global_optimization']['method']), - mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + downsampling="--downsampling={dsx},{dsy},{dsz}".format( + dsx=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_x"], + dsy=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_y"], + dsz=config["bigstitcher"]["calc_pairwise_shifts"]["downsample_in_z"], + ), + method="--method={method}".format( + method=config["bigstitcher"]["global_optimization"]["method"] + ), + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), + rm_old_xml=lambda wildcards, output: f"rm -f {output.dataset_xml}~?", output: dataset_xml=temp( bids( @@ -271,12 +290,13 @@ rule bigstitcher_spark_solver: shell: "cp {input.dataset_xml} {output.dataset_xml} && " "solver {params.mem_gb} {threads} -x {output.dataset_xml} " - " -s STITCHING --lambda 0.1 " #lambda 0.1 is default (can expose this if needed) - " {params.method} " - + " -s STITCHING --lambda 0.1 " + " {params.method} && " + "{params.rm_old_xml}" + #lambda 0.1 is default (can expose this if needed) -rule fuse_dataset: +rule bigstitcher_fusion: input: dataset_n5=bids( root=work, @@ -294,7 +314,11 @@ rule fuse_dataset: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher{}.xml".format('solver' if config['bigstitcher']['global_optimization']['enabled'] else 'stitching'), + suffix="bigstitcher{}.xml".format( + "solver" + if config["bigstitcher"]["global_optimization"]["enabled"] + else "stitching" + ), ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: @@ -363,7 +387,7 @@ rule fuse_dataset: " && {output.launcher} |& tee {log}" -rule fuse_dataset_spark: +rule bigstitcher_spark_fusion: input: dataset_n5=bids( root=work, @@ -381,7 +405,11 @@ rule fuse_dataset_spark: sample="{sample}", acq="{acq}", desc="{desc}", - suffix="bigstitcher{}.xml".format('solver' if config['bigstitcher']['global_optimization']['enabled'] else 'stitching'), + suffix="bigstitcher{}.xml".format( + "solver" + if config["bigstitcher"]["global_optimization"]["enabled"] + else "stitching" + ), ), ijm=Path(workflow.basedir) / "macros" / "FuseImageMacroZarr.ijm", params: @@ -398,7 +426,9 @@ rule fuse_dataset_spark: bsfy=config["bigstitcher"]["fuse_dataset"]["block_size_factor_y"], bsfz=config["bigstitcher"]["fuse_dataset"]["block_size_factor_z"], ), - mem_gb=lambda wildcards, resources: '{mem_gb}'.format(mem_gb=int(resources.mem_mb/1000)) + mem_gb=lambda wildcards, resources: "{mem_gb}".format( + mem_gb=int(resources.mem_mb / 1000) + ), output: zarr=temp( directory( @@ -440,7 +470,7 @@ rule fuse_dataset_spark: config["containers"]["spimprep"] resources: runtime=30, - mem_mb=40000, + mem_mb=30000, threads: config["cores_per_rule"] group: "preproc" diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 0248dd3..88d40e7 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -368,13 +368,6 @@ def get_output_ome_zarr(acq_type): } -def get_output_ome_zarr_as_input(wildcards): - if 'blaze' in wildcards.acq: - return get_output_ome_zarr('blaze') - elif 'prestitched' in wildcards.acq: - return get_output_ome_zarr('prestitched') - - def get_storage_creds(): """for rules that deal with remote storage directly""" protocol = Path(config["root"]).protocol diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk index 77a58b3..0ea5854 100644 --- a/workflow/rules/ome_zarr.smk +++ b/workflow/rules/ome_zarr.smk @@ -117,14 +117,15 @@ rule ome_zarr_to_zipstore: rule ome_zarr_to_nii: input: - zarr=bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr())), **get_storage_creds(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), params: channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain), uri=get_output_ome_zarr_uri(), diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index a79289f..fc32d7b 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -73,8 +73,15 @@ rule generate_flatfield_qc: rule generate_whole_slice_qc: "Generates an html file to view whole slices from preprocessed images" input: - unpack(get_output_ome_zarr_as_input), **get_storage_creds(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), ws_html=config["report"]["resources"]["ws_html"], params: ws_s_start=config["report"]["whole_slice_viewer"]["slice_start"], @@ -112,8 +119,15 @@ rule generate_whole_slice_qc: rule generate_volume_qc: "Generates an html file to view the volume rendered image" input: - unpack(get_output_ome_zarr_as_input), **get_storage_creds(), + zarr=bids( + root=root, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + suffix="SPIM.{ext}".format(ext=get_extension_ome_zarr()), + ), vol_viewer_dir=config["report"]["resources"]["vol_viewer_dir"], params: uri=get_output_ome_zarr_uri(), diff --git a/workflow/scripts/generate_volume_qc.py b/workflow/scripts/generate_volume_qc.py index d5c48d1..12258ff 100644 --- a/workflow/scripts/generate_volume_qc.py +++ b/workflow/scripts/generate_volume_qc.py @@ -14,8 +14,6 @@ # where html file should be written html_dest = snakemake.output.html -# inputted ome-zarr path -ome_data = snakemake.input.zarr # move volume renderer into the subjects directory copy_tree(snakemake.input.vol_viewer_dir, resource_dir) @@ -28,7 +26,11 @@ fs_args={} fs = get_fsspec(uri,**fs_args) -store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +if Path(uri).suffix == '.zip': + store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') +else: + store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') darr = da.from_zarr(store,component='/5') # Get most downsampled ome-zarr image diff --git a/workflow/scripts/generate_whole_slice_qc.py b/workflow/scripts/generate_whole_slice_qc.py index f721128..ae072d8 100644 --- a/workflow/scripts/generate_whole_slice_qc.py +++ b/workflow/scripts/generate_whole_slice_qc.py @@ -1,7 +1,6 @@ import os import math -from ome_zarr.io import parse_url -from ome_zarr.reader import Reader +import dask.array as da import matplotlib.pyplot as plt import numpy as np from jinja2 import Environment, FileSystemLoader @@ -19,14 +18,10 @@ ws_s_start=snakemake.params.ws_s_start ws_cmap=snakemake.params.ws_cmap -# input ome-zarr file -ome= snakemake.input.zarr - # output paths image_dir = snakemake.output.images_dir out_html = snakemake.output.html -from ome_zarr.io import ZarrLocation uri = snakemake.params.uri if is_remote(uri): @@ -35,18 +30,19 @@ fs_args={} fs = get_fsspec(uri,**fs_args) -store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') -zarrloc = ZarrLocation(store) -# read ome-zarr data and convert to list -proc_reader= Reader(zarrloc) -proc_data=list(proc_reader())[0].data +if Path(uri).suffix == '.zip': + store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') +else: + store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') + +darr = da.from_zarr(store,component='/5') os.makedirs(image_dir, exist_ok=True) channels = [] # for each channel add the images of the most downsampled data -for chan_num, channel in enumerate(proc_data[-1]): +for chan_num, channel in enumerate(darr): slice = ws_s_start chan = [] slices = [] diff --git a/workflow/scripts/ome_zarr_to_nii.py b/workflow/scripts/ome_zarr_to_nii.py index 720f9b4..728e1c8 100644 --- a/workflow/scripts/ome_zarr_to_nii.py +++ b/workflow/scripts/ome_zarr_to_nii.py @@ -19,18 +19,11 @@ fs = get_fsspec(uri,**fs_args) -print(fs) -print(uri) -print(Path(uri)) -print(Path(uri).path) - -print(Path(uri).suffix) if Path(uri).suffix == '.zip': store = zarr.storage.ZipStore(Path(uri).path,dimension_separator='/',mode='r') else: store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='r') -print(store) zi = zarr.open(store=store,mode='r') From f6fe321edffcd07b7a6d949e2ef2756a4ebbae0e Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Wed, 25 Sep 2024 15:50:06 -0400 Subject: [PATCH 9/9] fix --- config/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.yml b/config/config.yml index 7fa2b4c..edc16ba 100644 --- a/config/config.yml +++ b/config/config.yml @@ -64,7 +64,7 @@ bigstitcher: block_size_factor_z: 32 ome_zarr: - desc: sparkstitchedraw #flatcorr + desc: sparkstitchedflatcorr max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1