From fc8fd1f8ef1811b81844fd40605652a4af32e7e3 Mon Sep 17 00:00:00 2001 From: Ali Khan Date: Sat, 17 Feb 2024 23:59:00 -0500 Subject: [PATCH] Dimension separator fix, other refactoring (#3) * fix to ome-zarr, and refactoring output - use dimension separator of / for zarr (makes ome-zarr vieweable now!) - output single zarr with channels combined - put nifti resampled in derivatives --- config/config.yml | 38 +++++++++++++---- workflow/Snakefile | 8 +++- workflow/rules/bids.smk | 26 ++++++++++++ workflow/rules/common.smk | 30 +------------ workflow/rules/ome_zarr.smk | 42 +++++++++---------- .../apply_basic_flatfield_corr_zarr.py | 2 +- workflow/scripts/ome_zarr_to_nii.py | 11 ++--- workflow/scripts/resample_labels_to_zarr.py | 2 +- workflow/scripts/tif_to_zarr.py | 2 +- workflow/scripts/zarr_to_ome_zarr.py | 31 ++++++++------ workflow/scripts/zarr_to_ome_zarr_labels.py | 1 + 11 files changed, 112 insertions(+), 81 deletions(-) create mode 100644 workflow/rules/bids.smk diff --git a/config/config.yml b/config/config.yml index 157004a..8da69d0 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,13 +1,9 @@ datasets: 'config/datasets.tsv' -root: 'results' +root: 'bids' work: 'work' -targets: - desc: - - stitchedflatcorr #e.g. can be stitchedraw to skip flatfield correction - import: raw_tif_pattern: "{prefix}_Blaze[{tilex} x {tiley}]_C{channel}_xyz-Table Z{zslice}.ome.tif" intensity_rescaling: 0.5 #raw images seem to be at the upper end of uint16 (over-saturated) -- causes wrapping issues when adjusting with flatfield correction etc. this rescales the raw data as it imports it.. @@ -40,7 +36,8 @@ bigstitcher: block_size_factor_z: 1 ome_zarr: - max_downsampling_layers: 4 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} + desc: stitchedflatcorr + max_downsampling_layers: 5 # e.g. 4 levels: { 0: orig, 1: ds2, 2: ds4, 3: ds8, 4: ds16} rechunk_size: #z, y, x - 1 - 1024 @@ -52,6 +49,33 @@ nifti: - 3 - 4 + +bids: + raw: + Name: Name of the dataset + BIDSVersion: v1.9.0 + DatasetType: raw + License: The license for the dataset + Authors: + - Author Name 1 + - Author Name 2 + GeneratedBy: + - Name: SPIMprep + - Version: 0.1.0 + - CodeURL: https://github.com/khanlab/SPIMprep + resampled: + Name: Downsampled SPIM niftis + BIDSVersion: v1.9.0 + DatasetType: derived + Authors: + - Author Name 1 + - Author Name 2 + GeneratedBy: + - Name: SPIMprep + - Version: 0.1.0 + - CodeURL: https://github.com/khanlab/SPIMprep + + templates: - ABAv3 @@ -72,5 +96,3 @@ containers: spimprep: 'docker://khanlab/spimprep-deps:main' miracl: 'docker://mgoubran/miracl:2.2.5' -conda_envs: - global: 'envs/global.yml' diff --git a/workflow/Snakefile b/workflow/Snakefile index 57cf784..f8f3a24 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -14,9 +14,10 @@ container: config["containers"]["spimprep"] # use expandvars so we can use e.g. '$SLURM_TMPDIR' root = os.path.expandvars(config["root"]) work = os.path.expandvars(config["work"]) +resampled = Path(root) / "derivatives" / "resampled" # this is needed to use the latest bids spec with the pre-release snakebids -set_bids_spec("v0_10_1") +set_bids_spec("v0_11_0") # read datasets tsv datasets = pd.read_csv( @@ -39,10 +40,13 @@ include: "rules/common.smk" rule all: input: get_all_targets(), + Path(root) / "dataset_description.json", + Path(resampled) / "dataset_description.json", include: "rules/import.smk" include: "rules/flatfield_corr.smk" include: "rules/bigstitcher.smk" include: "rules/ome_zarr.smk" -include: "rules/atlasreg.smk" +include: "rules/bids.smk" + diff --git a/workflow/rules/bids.smk b/workflow/rules/bids.smk new file mode 100644 index 0000000..585162d --- /dev/null +++ b/workflow/rules/bids.smk @@ -0,0 +1,26 @@ +rule raw_dataset_desc: + params: + dd=config["bids"]["raw"], + output: + json=Path(root) / "dataset_description.json", + log: + 'logs/dd_raw.log' + run: + import json + + with open(output.json, "w") as outfile: + json.dump(params.dd, outfile, indent=4) + + +rule resampled_dataset_desc: + params: + dd=config["bids"]["resampled"], + output: + json=Path(resampled) / "dataset_description.json", + log: + 'logs/dd_raw.log' + run: + import json + + with open(output.json, "w") as outfile: + json.dump(params.dd, outfile, indent=4) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 08623ba..75df2db 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -10,54 +10,28 @@ def get_all_targets(): datatype="micr", sample="{sample}", acq="{acq}", - desc="{desc}", - stain="{stain}", suffix="spim.ome.zarr.zip", ), subject=datasets.loc[i, "subject"], sample=datasets.loc[i, "sample"], acq=datasets.loc[i, "acq"], - desc=config["targets"]["desc"], - stain=[datasets.loc[i, "stain_0"], datasets.loc[i, "stain_1"]], - ) - ) - targets.extend( - expand( - bids( - root=root, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - from_="{template}", - suffix="dseg.ome.zarr.zip", - ), - subject=datasets.loc[i, "subject"], - sample=datasets.loc[i, "sample"], - acq=datasets.loc[i, "acq"], - desc=config["targets"]["desc"], - template=config["templates"], - stain=[datasets.loc[i, "stain_0"], datasets.loc[i, "stain_1"]], ) ) targets.extend( expand( bids( - root=root, + root=resampled, subject="{subject}", datatype="micr", sample="{sample}", acq="{acq}", - desc="{desc}", + res="{level}x", stain="{stain}", - level="{level}", suffix="spim.nii", ), subject=datasets.loc[i, "subject"], sample=datasets.loc[i, "sample"], acq=datasets.loc[i, "acq"], - desc=config["targets"]["desc"], level=config["nifti"]["levels"], stain=[datasets.loc[i, "stain_0"], datasets.loc[i, "stain_1"]], ) diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk index b403eb3..71ae8ea 100644 --- a/workflow/rules/ome_zarr.smk +++ b/workflow/rules/ome_zarr.smk @@ -1,14 +1,19 @@ rule zarr_to_ome_zarr: input: - zarr=bids( - root=work, - subject="{subject}", - datatype="micr", - sample="{sample}", - acq="{acq}", - desc="{desc}", - stain="{stain}", - suffix="spim.zarr", + zarr=lambda wildcards: expand( + bids( + root=work, + subject="{subject}", + datatype="micr", + sample="{sample}", + acq="{acq}", + desc="{desc}", + stain="{stain}", + suffix="spim.zarr", + ), + stain=get_stains(wildcards), + desc=config["ome_zarr"]["desc"], + allow_missing=True, ), metadata_json=rules.raw_to_metadata.output.metadata_json, params: @@ -25,8 +30,6 @@ rule zarr_to_ome_zarr: datatype="micr", sample="{sample}", acq="{acq}", - desc="{desc}", - stain="{stain}", suffix="spim.ome.zarr", ) ) @@ -39,8 +42,6 @@ rule zarr_to_ome_zarr: datatype="zarr_to_ome_zarr", sample="{sample}", acq="{acq}", - desc="{desc}", - stain="{stain}", suffix="log.txt", ), container: @@ -73,20 +74,19 @@ rule ome_zarr_to_nii: datatype="micr", sample="{sample}", acq="{acq}", - desc="{desc}", - stain="{stain}", suffix="spim.ome.zarr.zip", ), + params: + channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain), output: nii=bids( - root=root, + root=resampled, subject="{subject}", datatype="micr", sample="{sample}", acq="{acq}", - desc="{desc}", + res="{level}x", stain="{stain}", - level="{level}", suffix="spim.nii", ), benchmark: @@ -96,9 +96,8 @@ rule ome_zarr_to_nii: subject="{subject}", sample="{sample}", acq="{acq}", - desc="{desc}", + res="{level}x", stain="{stain}", - level="{level}", suffix="benchmark.tsv", ) log: @@ -108,9 +107,8 @@ rule ome_zarr_to_nii: subject="{subject}", sample="{sample}", acq="{acq}", - desc="{desc}", + res="{level}x", stain="{stain}", - level="{level}", suffix="log.txt", ), group: diff --git a/workflow/scripts/apply_basic_flatfield_corr_zarr.py b/workflow/scripts/apply_basic_flatfield_corr_zarr.py index 73b2a7d..2064a97 100644 --- a/workflow/scripts/apply_basic_flatfield_corr_zarr.py +++ b/workflow/scripts/apply_basic_flatfield_corr_zarr.py @@ -50,4 +50,4 @@ def apply_basic_parallel(x): arr_stacked = da.stack(chan_arr_list,axis=1) with ProgressBar(): - da.to_zarr(arr_stacked,snakemake.output.zarr,overwrite=True) + da.to_zarr(arr_stacked,snakemake.output.zarr,overwrite=True,dimension_separator='/') diff --git a/workflow/scripts/ome_zarr_to_nii.py b/workflow/scripts/ome_zarr_to_nii.py index 5217330..db53129 100644 --- a/workflow/scripts/ome_zarr_to_nii.py +++ b/workflow/scripts/ome_zarr_to_nii.py @@ -7,6 +7,7 @@ from ome_zarr.reader import Reader in_zarr = snakemake.input.zarr +channel_index = snakemake.params.channel_index zi = zarr.open(in_zarr) @@ -21,11 +22,12 @@ #zarr uses z,y,x ordering, we reverse this for nifti # also flip to set orientation properly affine = np.eye(4) -affine[0,0]=-transforms[0]['scale'][2] #z -affine[1,1]=-transforms[0]['scale'][1] #y -affine[2,2]=-transforms[0]['scale'][0] #x +affine[0,0]=-transforms[0]['scale'][3] #x +affine[1,1]=-transforms[0]['scale'][2] #y +affine[2,2]=-transforms[0]['scale'][1] #z -darr = da.from_zarr(in_zarr,component=f'/{level}') +#grab the channel index corresponding to the stain +darr = da.from_zarr(in_zarr,component=f'/{level}')[channel_index,:,:,:].squeeze() #input array axes are ZYX #writing to nifti we want XYZ @@ -35,5 +37,4 @@ affine=affine ) - nii.to_filename(snakemake.output.nii) diff --git a/workflow/scripts/resample_labels_to_zarr.py b/workflow/scripts/resample_labels_to_zarr.py index d91a193..060a02e 100644 --- a/workflow/scripts/resample_labels_to_zarr.py +++ b/workflow/scripts/resample_labels_to_zarr.py @@ -94,6 +94,6 @@ def interp_label(x,block_info=None): darr_map=darr.map_blocks(interp_label, dtype=np.uint16) with ProgressBar(): - da.to_zarr(darr_map,out_zarr,overwrite=True) + da.to_zarr(darr_map,out_zarr,overwrite=True,dimension_separator='/') diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index 4f16384..bfd4a86 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -64,6 +64,6 @@ def single_imread(*args): #now we can do the computation itself, storing to zarr print('writing images to zarr with dask') with ProgressBar(): - da.to_zarr(darr,snakemake.output.zarr,overwrite=True) + da.to_zarr(darr,snakemake.output.zarr,overwrite=True,dimension_separator='/') diff --git a/workflow/scripts/zarr_to_ome_zarr.py b/workflow/scripts/zarr_to_ome_zarr.py index 0aed302..f7584df 100644 --- a/workflow/scripts/zarr_to_ome_zarr.py +++ b/workflow/scripts/zarr_to_ome_zarr.py @@ -7,6 +7,7 @@ from dask.diagnostics import ProgressBar in_zarr=snakemake.input.zarr + metadata_json=snakemake.input.metadata_json downsampling=snakemake.params.downsampling max_layer=snakemake.params.max_downsampling_layers #number of downsamplings by 2 to include in zarr @@ -14,14 +15,6 @@ out_zarr=snakemake.output.zarr scaling_method=snakemake.params.scaling_method -#open zarr to get group name -zi = zarr.open(in_zarr) -group_name = [g for g in zi.group_keys()][0] - -darr = da.from_zarr(in_zarr,component=f'{group_name}/s0',chunks=rechunk_size) -darr - - # prepare metadata for ome-zarr with open(metadata_json) as fp: metadata = json.load(fp) @@ -37,12 +30,25 @@ for l in range(max_layer+1): - coordinate_transformations.append( [{'scale': [voxdim[0],(2**l)*voxdim[1],(2**l)*voxdim[2]], #image-pyramids in XY only + coordinate_transformations.append( [{'scale': [1,voxdim[0],(2**l)*voxdim[1],(2**l)*voxdim[2]], #image-pyramids in XY only 'type': 'scale'}]) -axes = [ {'name': ax, 'type': 'space', 'unit': 'micrometer'} for ax in ['z','y','x'] ] +axes = [{'name': 'c', 'type': 'channel'}] + [{'name': ax, 'type': 'space', 'unit': 'micrometer'} for ax in ['z','y','x'] ] + + +darr_list=[] +for zarr_i in range(len(snakemake.input.zarr)): + + #open zarr to get group name + in_zarr=snakemake.input.zarr[zarr_i] + zi = zarr.open(in_zarr) + group_name = [g for g in zi.group_keys()][0] + + darr_list.append(da.from_zarr(in_zarr,component=f'{group_name}/s0',chunks=rechunk_size)) + +darr_channels = da.stack(darr_list) store = zarr.DirectoryStore(out_zarr) @@ -52,12 +58,11 @@ with ProgressBar(): - write_image(image=darr, + write_image(image=darr_channels, group=root, scaler=scaler, coordinate_transformations=coordinate_transformations, - #fmt=format_from_version(0.1), -# fmt=format_from_version(0.4), + storage_options={'dimension_separator': '/'}, axes=axes) diff --git a/workflow/scripts/zarr_to_ome_zarr_labels.py b/workflow/scripts/zarr_to_ome_zarr_labels.py index 7660f7d..2e425e5 100644 --- a/workflow/scripts/zarr_to_ome_zarr_labels.py +++ b/workflow/scripts/zarr_to_ome_zarr_labels.py @@ -67,6 +67,7 @@ group=root, scaler=scaler, name=label_name, + storage_options={'dimension_separator': '/'}, coordinate_transformations=coordinate_transformations, axes=axes)