From 1b1afc4a4cbdf79daf2b2f244fdab95207ca0847 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Tue, 3 Sep 2024 14:01:04 -0400 Subject: [PATCH] set chunking in bdv n5 (default 32x32x32) To hopefully address performance issues in the bigstitcher step when dealing with large datasets. Also sets dtype to int16 instead of uint16 for this only -- some imagesc posts describing the black bars had this as a fix.. --- workflow/rules/bigstitcher.smk | 2 +- workflow/scripts/zarr_to_n5_bdv.py | 7 ++++--- workflow/scripts/zarr_to_ome_zarr.py | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/workflow/rules/bigstitcher.smk b/workflow/rules/bigstitcher.smk index 7bee8fc..b0a3ea7 100644 --- a/workflow/rules/bigstitcher.smk +++ b/workflow/rules/bigstitcher.smk @@ -40,7 +40,7 @@ rule zarr_to_bdv: ) / "dataset.xml" ), - chunks=(1,1,32,32,32) + chunks=(32,32,32) #the previous default was 1 x Nx x Ny (Nx Ny were full tile size!) output: bdv_n5=temp( directory( diff --git a/workflow/scripts/zarr_to_n5_bdv.py b/workflow/scripts/zarr_to_n5_bdv.py index 9d4f7ff..4976625 100644 --- a/workflow/scripts/zarr_to_n5_bdv.py +++ b/workflow/scripts/zarr_to_n5_bdv.py @@ -34,7 +34,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): max_downsampling_layers=snakemake.params.max_downsampling_layers #load data (tiles,chans,zslices,x,y) -darr = da.from_zarr(in_zarr,chunks=snakemake.params.chunks).astype(np.int16) +darr = da.from_zarr(in_zarr) (n_tiles,n_chans,n_z,n_x,n_y) = darr.shape @@ -52,7 +52,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): overwrite=True, nchannels=len(metadata['channels']), ntiles=len(metadata['tiles_x'])*len(metadata['tiles_y']), - blockdim=(snakemake.params.chunks[2:])) + blockdim=((1,256,256),)) bdv_writer.set_attribute_labels('channel', metadata['channels']) @@ -90,6 +90,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): print('removing empty bdv h5/xml') rmtree(temp_bdv_dir) + print('writing data to n5') n5_store = zarr.n5.N5Store(snakemake.output.bdv_n5) @@ -98,7 +99,7 @@ def update_xml_h5_to_n5(in_xml,out_xml,in_n5): ds_list=[] #for setup-level attrs for ds in range(max_downsampling_layers): step=2**ds #1,2,4,8.. - zstack = da.squeeze(darr[tile_i,chan_i,:,::step,::step]) + zstack = da.squeeze(darr[tile_i,chan_i,:,::step,::step]).rechunk(snakemake.params.chunks).astype(np.int16) print(f'writing to setup{setup_i}/timepoint0/s{ds}') with ProgressBar(): zstack.to_zarr(n5_store,component=f'setup{setup_i}/timepoint0/s{ds}',overwrite=True,compute=True) diff --git a/workflow/scripts/zarr_to_ome_zarr.py b/workflow/scripts/zarr_to_ome_zarr.py index a0a3567..47dc718 100644 --- a/workflow/scripts/zarr_to_ome_zarr.py +++ b/workflow/scripts/zarr_to_ome_zarr.py @@ -68,6 +68,7 @@ darr_list.append(da.from_zarr(in_zarr,component=f'{group_name}/s0',chunks=rechunk_size)) + #append to omero metadata channel_metadata={key:val for key,val in snakemake.config['ome_zarr']['omero_metadata']['channels']['defaults'].items()} channel_name=stains[zarr_i]