Skip to content

Commit

Permalink
update the tif_to_zarr_gcs for tifzstacks on google cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
akhanf committed Oct 1, 2024
1 parent 4d1b90e commit a2ce218
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 91 deletions.
139 changes: 84 additions & 55 deletions workflow/scripts/blaze_to_metadata_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,44 @@
else:
is_zstack=True

if is_zstack:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
#now check to see if there is just single tile
if ' x ' in Path(tifs[0]).name:
is_tiled=True
else:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
is_tiled=False



if is_tiled:
if is_zstack:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
else:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
else:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"]



#parse the filenames to get number of channels, tiles etc..
if is_zstack:
prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs)
if is_tiled:
if is_zstack:
prefix, tilex, tiley, channel = glob_wildcards(in_tif_pattern,files=tifs)
else:
prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs)
else:
prefix, tilex, tiley, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs)
zslices = sorted(list(set(zslice)))
prefix, channel, zslice = glob_wildcards(in_tif_pattern,files=tifs)

if is_tiled:
tiles_x = sorted(list(set(tilex)))
tiles_y = sorted(list(set(tiley)))

tiles_x = sorted(list(set(tilex)))
tiles_y = sorted(list(set(tiley)))
channels = sorted(list(set(channel)))
prefixes = sorted(list(set(prefix)))

if not is_zstack:
zslices = sorted(list(set(zslice)))


if not is_zstack:
zslices = sorted(list(set(zslice)))

Expand All @@ -68,70 +87,80 @@

#read tile configuration from the microscope metadata
if axes == 'CZYX':
if is_zstack:
tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+)\)"
else:
tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
elif axes == 'ZYX':
if is_zstack:
tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+)\)"
else:
tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"

tile_pattern = re.compile(tile_config_pattern)

#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice
map_x=dict()
map_y=dict()
map_z=dict()

map_tiles_to_chunk=dict()
chunks = []
for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)):
map_tiles_to_chunk[tilex+tiley] = chunk
chunks.append(chunk)

for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]:

d = re.search(tile_pattern,line).groupdict()
chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first

if is_zstack:
key = f"tile-{chunk}_chan-{d['channel']}_z-0000"
if is_tiled:
if is_zstack:
tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+)\)"
else:
tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+),(?P<chan>\S+), (?P<z>\S+)\)"
else:
#key is: tile-{chunk}_chan-{channel}_z-{zslice}
key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}"
print('single tile, axes=CZYX')

map_x[key] = float(d['x'])
map_y[key] = float(d['y'])
if is_zstack:
map_z[key] = float(0)
elif axes == 'ZYX':
if is_tiled:
if is_zstack:
tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+)\)"
else:
tile_config_pattern=r"Blaze\[(?P<tilex>[0-9]+) x (?P<tiley>[0-9]+)\]_C(?P<channel>[0-9]+)_xyz-Table Z(?P<zslice>[0-9]+).ome.tif;;\((?P<x>\S+), (?P<y>\S+), (?P<z>\S+)\)"
else:
map_z[key] = float(d['z'])



print('single tile, axes=ZYX')

if is_tiled:
tile_pattern = re.compile(tile_config_pattern)

#put it in 3 maps, one for each coord, indexed by tilex, tiley, channel, and aslice
map_x=dict()
map_y=dict()
map_z=dict()

map_tiles_to_chunk=dict()
chunks = []
for chunk,(tilex,tiley) in enumerate(product(tiles_x,tiles_y)):
map_tiles_to_chunk[tilex+tiley] = chunk
chunks.append(chunk)

for line in custom_metadata['TileConfiguration']['@TileConfiguration'].split(' ')[1:]:

d = re.search(tile_pattern,line).groupdict()
chunk = map_tiles_to_chunk[d['tilex']+d['tiley']] # want the key to have chunk instad of tilex,tiley, so map to that first

if is_zstack:
key = f"tile-{chunk}_chan-{d['channel']}_z-0000"
else:
#key is: tile-{chunk}_chan-{channel}_z-{zslice}
key = f"tile-{chunk}_chan-{d['channel']}_z-{d['zslice']}"

map_x[key] = float(d['x'])
map_y[key] = float(d['y'])
if is_zstack:
map_z[key] = float(0)
else:
map_z[key] = float(d['z'])


metadata={}
metadata['tiles_x'] = tiles_x
metadata['tiles_y'] = tiles_y
if is_tiled:
metadata['tiles_x'] = tiles_x
metadata['tiles_y'] = tiles_y
metadata['chunks'] = chunks
metadata['lookup_tile_offset_x'] = map_x
metadata['lookup_tile_offset_y'] = map_y
metadata['lookup_tile_offset_z'] = map_z

metadata['channels'] = channels

if not is_zstack:
metadata['zslices'] = zslices
metadata['prefixes'] = prefixes
metadata['chunks'] = chunks
metadata['axes'] = axes
metadata['shape'] = shape
metadata['physical_size_x'] = float(physical_size_x)
metadata['physical_size_y'] = float(physical_size_y)
metadata['physical_size_z'] = float(physical_size_z)
metadata['lookup_tile_offset_x'] = map_x
metadata['lookup_tile_offset_y'] = map_y
metadata['lookup_tile_offset_z'] = map_z
metadata['ome_full_metadata'] = ome_dict
metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0 ] #zyx since OME-Zarr is ZYX
metadata['PixelSizeUnits'] = 'mm'
metadata['is_zstack'] = is_zstack
metadata['is_tiled'] = is_tiled

#write metadata to json
with open(snakemake.output.metadata_json, 'w') as fp:
Expand Down
99 changes: 63 additions & 36 deletions workflow/scripts/tif_to_zarr_gcs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import tifffile
import json
import dask.array as da
from dask.delayed import delayed
import dask.array.image
from itertools import product
from dask.diagnostics import ProgressBar
Expand All @@ -21,14 +22,6 @@ def replace_square_brackets(pattern):
pattern = pattern.replace('##RIGHTBRACKET##','[]]')
return pattern

def get_zstack_metadata(gcs_uri,fs):
with fs.open(gcs_uri, 'rb') as file:
tif = tifffile.TiffFile(file)
pages = tif.pages
n_z = len(pages)
sample = tif.pages[0].asarray()
return {'n_z': n_z, 'shape': sample.shape, 'dtype': sample.dtype}


def get_tiff_num_pages(fs,gcs_uri):
with fs.open(gcs_uri, 'rb') as file:
Expand All @@ -39,6 +32,19 @@ def read_tiff_slice(fs,gcs_uri, key=0):
with fs.open(gcs_uri, 'rb') as file:
return tifffile.imread(file, key=key)

def read_page_as_numpy(tif_file_uri, page, fs=None):
"""Gets a single page (i.e., 2D image) from a tif file z-stack stored in a cloud URI."""

# Open the file from the cloud storage using fsspec
with fs.open(tif_file_uri, 'rb') as f:
# Read the file content into a buffer
file_buffer = f.read()

# Use pyvips to read from the buffer
return pyvips.Image.new_from_buffer(file_buffer, "", page=page).numpy()



def build_zstack(gcs_uris,fs):
"""Build a z-stack from a list of GCS URIs."""
lazy_arrays = [
Expand Down Expand Up @@ -70,11 +76,16 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):


is_zstack = metadata['is_zstack']
is_tiled = metadata['is_tiled']

if is_zstack:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
if is_tiled:
if is_zstack:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_zstack"]
else:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
else:
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern"]
in_tif_pattern = snakemake.config["import_blaze"]["raw_tif_pattern_notile"]




Expand All @@ -84,39 +95,55 @@ def build_zstack_from_single(gcs_uri,zstack_metadata,fs):


#TODO: put these in top-level metadata for easier access..
size_x=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX']
size_y=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY']
size_z=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ']
size_c=metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC']
size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y'])
size_x=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeX'])
size_y=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeY'])
size_z=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeZ'])
size_c=int(metadata['ome_full_metadata']['OME']['Image']['Pixels']['@SizeC'])


#now get the first channel and first zslice tif

if is_zstack:
#prepopulate this since its the same for all tiles, and takes time to query
zstack_metadata = get_zstack_metadata('gcs://'+in_tif_pattern.format(tilex=metadata['tiles_x'][0],tiley=metadata['tiles_y'][0],prefix=metadata['prefixes'][0],channel=metadata['channels'][0]),fs=fs)
if is_tiled:

tiles=[]
for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])):

zstacks=[]
for i_chan,channel in enumerate(metadata['channels']):

if is_zstack:
zstacks.append(build_zstack_from_single('gcs://'+in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel),fs=fs,zstack_metadata=zstack_metadata))
else:
zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))
size_tiles=len(metadata['tiles_x'])*len(metadata['tiles_y'])

#now get the first channel and first zslice tif


tiles=[]
for i_tile,(tilex,tiley) in enumerate(product(metadata['tiles_x'],metadata['tiles_y'])):

print(f'tile {tilex}x{tiley}, {i_tile}')
zstacks=[]
for i_chan,channel in enumerate(metadata['channels']):

print(f'channel {i_chan}')
if is_zstack:

tif_file = in_tif_pattern.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel)

pages=[]
#read each page
for i_z in range(size_z):
pages.append(da.from_delayed(delayed(read_page_as_numpy)('gcs://'+tif_file,i_z),shape=(size_y,size_x),dtype='uint16'))

zstacks.append(da.stack(pages))

#have list of zstack dask arrays for the tile, one for each channel
#stack them up and append to list of tiles
tiles.append(da.stack(zstacks))
else:
zstacks.append(build_zstack(fs.glob('gcs://'+in_tif_glob.format(tilex=tilex,tiley=tiley,prefix=metadata['prefixes'][0],channel=channel,zslice='*')),fs=fs))

#have list of zstack dask arrays for the tile, one for each channel
#stack them up and append to list of tiles
tiles.append(da.stack(zstacks))


#now we have list of tiles, each a dask array
#stack them up to get our final array
darr = da.stack(tiles)

else:
print("single tile data not supported for tif_to_zarr_gcs")

#now we have list of tiles, each a dask array
#stack them up to get our final array
darr = da.stack(tiles)

#rescale intensities, and recast
darr = darr * snakemake.params.intensity_rescaling
Expand Down

0 comments on commit a2ce218

Please sign in to comment.