diff --git a/workflow/scripts/blaze_to_metadata.py b/workflow/scripts/blaze_to_metadata.py index 196dac5..6eb88cd 100644 --- a/workflow/scripts/blaze_to_metadata.py +++ b/workflow/scripts/blaze_to_metadata.py @@ -16,10 +16,19 @@ else: is_zstack=True -if is_zstack: - in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) +#now check to see if there is just single tile +if ' x ' in Path(tif_files[0]).name: + is_tiled=True else: - in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) + is_tiled=False + +if is_tiled: + if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) + else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) +else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"]) #add a wildcard constraint to ensure no #subfolders get parsed (ie don't match anything with / in it): @@ -28,24 +37,34 @@ #parse the filenames to get number of channels, tiles etc.. -if is_zstack: - prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files) +if is_tiled: + if is_zstack: + prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern_constrained,tif_files) + else: + prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) else: - prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) + prefix, channel, zslice = glob_wildcards(in_tif_pattern_constrained,tif_files) + +if is_tiled: + tiles_x = sorted(list(set(tilex))) + tiles_y = sorted(list(set(tiley))) -tiles_x = sorted(list(set(tilex))) -tiles_y = sorted(list(set(tiley))) channels = sorted(list(set(channel))) prefixes = sorted(list(set(prefix))) + if not is_zstack: zslices = sorted(list(set(zslice))) - #read in series metadata from first file - in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) - +if is_tiled: + if is_zstack: + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) + else: + #read in series metadata from first file + in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) else: - in_tif = in_tif_pattern.format(tilex=tiles_x[0],tiley=tiles_y[0],prefix=prefixes[0],channel=channels[0]) + in_tif = in_tif_pattern.format(prefix=prefixes[0],channel=channels[0],zslice=zslices[0]) + raw_tif = tifffile.TiffFile(in_tif,mode='r') @@ -62,68 +81,79 @@ #read tile configuration from the microscope metadata if axes == 'CZYX': - if is_zstack: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" else: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+),(?P\S+), (?P\S+)\)" + print('single tile, axes=CZYX') + elif axes == 'ZYX': - if is_zstack: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + if is_tiled: + if is_zstack: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+)\)" + else: + tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" else: - tile_config_pattern=r"Blaze\[(?P[0-9]+) x (?P[0-9]+)\]_C(?P[0-9]+)_xyz-Table Z(?P[0-9]+).ome.tif;;\((?P\S+), (?P\S+), (?P\S+)\)" - - -tile_pattern = re.compile(tile_config_pattern) - -#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice -map_x=dict() -map_y=dict() -map_z=dict() - -map_tiles_to_chunk=dict() -chunks = [] -for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): - map_tiles_to_chunk[tilex+tiley] = chunk - chunks.append(chunk) - -for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: - d = re.search(tile_pattern,line).groupdict() - chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first - - if is_zstack: - key = f"tile-{chunk}_chan-{d['channel']}_z-0" - else: - #key is: tile-{chunk}_chan-{channel}_z-{zslice} - key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" - - map_x[key] = float(d['x']) - map_y[key] = float(d['y']) - if is_zstack: - map_z[key] = float(0) - else: - map_z[key] = float(d['z']) - + print('single tile, axes=ZYX') + +if is_tiled: + tile_pattern = re.compile(tile_config_pattern) + + #put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice + map_x=dict() + map_y=dict() + map_z=dict() + + map_tiles_to_chunk=dict() + chunks = [] + for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)): + map_tiles_to_chunk[tilex+tiley] = chunk + chunks.append(chunk) + + for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]: + d = re.search(tile_pattern,line).groupdict() + chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first + + if is_zstack: + key = f"tile-{chunk}_chan-{d['channel']}_z-0" + else: + #key is: tile-{chunk}_chan-{channel}_z-{zslice} + key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}" + + map_x[key] = float(d['x']) + map_y[key] = float(d['y']) + if is_zstack: + map_z[key] = float(0) + else: + map_z[key] = float(d['z']) + metadata={} -metadata['tiles_x'] = tiles_x -metadata['tiles_y'] = tiles_y +if is_tiled: + metadata['tiles_x'] = tiles_x + metadata['tiles_y'] = tiles_y + metadata['chunks'] = chunks + metadata['lookup_tile_offset_x'] = map_x + metadata['lookup_tile_offset_y'] = map_y + metadata['lookup_tile_offset_z'] = map_z + metadata['channels'] = channels + if not is_zstack: metadata['zslices'] = zslices metadata['prefixes'] = prefixes -metadata['chunks'] = chunks metadata['axes'] = axes metadata['shape'] = shape metadata['physical_size_x'] = float(physical_size_x) metadata['physical_size_y'] = float(physical_size_y) metadata['physical_size_z'] = float(physical_size_z) -metadata['lookup_tile_offset_x'] = map_x -metadata['lookup_tile_offset_y'] = map_y -metadata['lookup_tile_offset_z'] = map_z metadata['ome_full_metadata'] = ome_dict metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX metadata['PixelSizeUnits'] = 'mm' metadata['is_zstack'] = is_zstack +metadata['is_tiled'] = is_tiled #write metadata to json with open(snakemake.output.metadata_json, 'w') as fp: diff --git a/workflow/scripts/tif_to_zarr.py b/workflow/scripts/tif_to_zarr.py index 4d61196..034a9cb 100644 --- a/workflow/scripts/tif_to_zarr.py +++ b/workflow/scripts/tif_to_zarr.py @@ -36,11 +36,16 @@ def read_page_as_numpy(tif_file,page): is_zstack = metadata['is_zstack'] +is_tiled = metadata['is_tiled'] -if is_zstack: - in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) +if is_tiled: + if is_zstack: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]) + else: + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) else: - in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern"]) + in_tif_pattern = os.path.join(snakemake.input.ome_dir,snakemake.config["import_blaze"]["raw_tif_pattern_notile"]) + #use tif pattern but replace the [ and ] with [[] and []] so glob doesn't choke @@ -52,39 +57,58 @@ def read_page_as_numpy(tif_file,page): size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']) size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']) size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']) -size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) -#now get the first channel and first zslice tif -tiles=[] -for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): - print(f'tile {tilex}x{tiley}, {i_tile}') + + + +if is_tiled: + + size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y']) + + #now get the first channel and first zslice tif + tiles=[] + for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])): + print(f'tile {tilex}x{tiley}, {i_tile}') + zstacks=[] + for i_chan,channel in enumerate(metadata['channels']): + + print(f'channel {i_chan}') + + if is_zstack: + tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) + + pages=[] + #read each page + for i_z in range(size_z): + pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) + + zstacks.append(da.stack(pages)) + print(zstacks[-1].shape) + else: + zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) + + #have list of zstack dask arrays for the tile, one for each channel + #stack them up and append to list of tiles + tiles.append(da.stack(zstacks)) + + + + #now we have list of tiles, each a dask array + #stack them up to get our final array + darr = da.stack(tiles) + +else: + #single tile, zslices: zstacks=[] for i_chan,channel in enumerate(metadata['channels']): print(f'channel {i_chan}') + zstacks.append(imread_tifs(in_tif_glob.format(prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - if is_zstack: - tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel) - - pages=[] - #read each page - for i_z in range(size_z): - pages.append(da.from_delayed(delayed(read_page_as_numpy)(tif_file,i_z),shape=(size_y,size_x),dtype='uint16')) - - zstacks.append(da.stack(pages)) - print(zstacks[-1].shape) - else: - zstacks.append(imread_tifs(in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*'), imread=single_imread)) - - #have list of zstack dask arrays for the tile, one for each channel - #stack them up and append to list of tiles - tiles.append(da.stack(zstacks)) - + #stack the channels up + darr = da.stack(zstacks) -#now we have list of tiles, each a dask array -#stack them up to get our final array -darr = da.stack(tiles) print(darr.shape) print(darr.chunksize)