Skip to content

Commit

Permalink
read multi-page tiffs (#252)
Browse files Browse the repository at this point in the history
* add tifffile dependency, deactivate python 3.6

* add imagecodecs dependency

* implement first version of multipage tif support

* fix formatting

* fix linting

* fix reading for single page tiff files

* remove unnecessary if

* fix image readers

* fix linting

* correctly read dimensions from tiff series

* update formatting

* fix linting

* fix formatting

* fix linting

* correctly read image formats

* add missing types

* correctly read all channels of data

* implement feedback

Co-authored-by: Youri K <[email protected]>
  • Loading branch information
fm3 and Youri K authored Jan 22, 2021
1 parent c15bddb commit 54f7bf0
Show file tree
Hide file tree
Showing 7 changed files with 235 additions and 102 deletions.
22 changes: 11 additions & 11 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: [3.7, 3.8]

steps:
- uses: actions/checkout@v1
Expand All @@ -25,10 +25,10 @@ jobs:
run: |
pip install poetry
poetry install
- name: Decompress test data
run: tar -xzvf testdata/WT1_wkw.tar.gz

- name: Check formatting
run: poetry run black --check .

Expand All @@ -38,10 +38,10 @@ jobs:
- name: Check typing
run: |
./typecheck.sh
- name: Python tests
run: poetry run pytest tests

- name: Smoke test docker
run: |
docker run --rm \
Expand All @@ -59,19 +59,19 @@ jobs:

- name: Test tile cubing
run: tests/scripts/tile_cubing.sh

- name: Test simple tiff cubing
run: tests/scripts/simple_tiff_cubing.sh

- name: Test simple tiff cubing (no compression)
run: tests/scripts/simple_tiff_cubing_no_compression.sh

- name: Test metadata generation
run: tests/scripts/meta_generation.sh

- name: Test KNOSSOS conversion
run: tests/scripts/knossos_conversion.sh

- name: Decompress reference magnification data
run: |
mkdir -p testdata/tiff_mag_2_reference
Expand All @@ -98,7 +98,7 @@ jobs:
DOCKER_PASS: ${{ secrets.DOCKER_PASS }}
run: |
echo $DOCKER_PASS | docker login -u $DOCKER_USER --password-stdin
- name: Push docker images
run: |
docker push scalableminds/webknossos-cuber:$GITHUB_SHA
Expand Down
172 changes: 103 additions & 69 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ readme = "README.md"
license = "AGPL-3.0"

[tool.poetry.dependencies]
python = "^3.6"
python = "^3.7"
scipy = "^1.4.0"
numpy = "^1.17.4"
pillow = "^6.2.1"
Expand All @@ -19,6 +19,8 @@ psutil = "^5.6.7"
nibabel = "^2.5.1"
scikit-image = "^0.16.2"
scikit-learn = "^0.24.0"
tifffile = "^2020.11.26"
imagecodecs = "^2020.5.30"

[tool.poetry.dev-dependencies]
pylint = "^2.6.0"
Expand Down
8 changes: 8 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@
import wkw
from wkcuber.mag import Mag
import os
from shutil import rmtree

BLOCK_LEN = 32


def delete_dir(relative_path):
if os.path.exists(relative_path) and os.path.isdir(relative_path):
rmtree(relative_path)


def test_get_chunks():
source = list(range(0, 48))
target = list(get_chunks(source, 8))
Expand Down Expand Up @@ -42,6 +48,8 @@ def test_buffered_slice_writer():
mag = Mag(1)
dataset_path = os.path.join(dataset_dir, layer_name, mag.to_layer_name())

delete_dir(dataset_dir)

with BufferedSliceWriter(dataset_dir, layer_name, dtype, origin, mag=mag) as writer:
for i in range(13):
writer.write_slice(i, test_img)
Expand Down
23 changes: 17 additions & 6 deletions wkcuber/cubing.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def find_source_filenames(source_path: str) -> List[str]:
return natsorted(source_files)


def read_image_file(file_name: str, dtype: type) -> np.ndarray:
def read_image_file(file_name: str, dtype: type, z_slice: int) -> np.ndarray:
try:
return image_reader.read_array(file_name, dtype)
return image_reader.read_array(file_name, dtype, z_slice)
except Exception as exc:
logging.error("Reading of file={} failed with {}".format(file_name, exc))
raise exc
Expand Down Expand Up @@ -176,7 +176,7 @@ def cubing_job(
for z, file_name in zip(z_batch, source_file_batch):
# Image shape will be (x, y, channel_count, z=1)
image = read_image_file(
file_name, target_wkw_info.header.voxel_type
file_name, target_wkw_info.header.voxel_type, z
)

if not pad:
Expand Down Expand Up @@ -237,13 +237,19 @@ def cubing(
batch_size: int,
args: Namespace,
) -> dict:

source_files = find_source_filenames(source_path)

# All images are assumed to have equal dimensions
num_x, num_y = image_reader.read_dimensions(source_files[0])
num_channels = image_reader.read_channel_count(source_files[0])
num_z = len(source_files)
num_z_slices_per_file = image_reader.read_z_slices_per_file(source_files[0])
assert (
num_z_slices_per_file == 1 or len(source_files) == 1
), "Multi page TIFF support only for single files"
if num_z_slices_per_file > 1:
num_z = num_z_slices_per_file
else:
num_z = len(source_files)

target_mag = Mag(args.target_mag)
target_wkw_info = WkwDatasetInfo(
Expand Down Expand Up @@ -277,14 +283,19 @@ def cubing(
# Prepare z batches
max_z = min(num_z + start_z, z + BLOCK_LEN)
z_batch = list(range(z, max_z))
# Prepare source files array
if len(source_files) > 1:
source_files_array = source_files[z - start_z : max_z - start_z]
else:
source_files_array = source_files * (max_z - z)
# Prepare job
job_args.append(
(
target_wkw_info,
z_batch,
target_mag,
interpolation_mode,
source_files[z - start_z : max_z - start_z],
source_files_array,
batch_size,
(num_x, num_y),
args.pad,
Expand Down
106 changes: 92 additions & 14 deletions wkcuber/image_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,30 @@

from .vendor.dm3 import DM3
from .vendor.dm4 import DM4File, DM4TagHeader
from tifffile import TiffFile

# Disable PIL's maximum image limit.
Image.MAX_IMAGE_PIXELS = None


class PillowImageReader:
def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
class ImageReader:
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
pass

def read_dimensions(self, file_name: str) -> Tuple[int, int]:
pass

def read_channel_count(self, file_name: str) -> int:
pass

def read_z_slices_per_file(
self, file_name: str # pylint: disable=unused-argument
) -> int:
return 1


class PillowImageReader(ImageReader):
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
this_layer = np.array(Image.open(file_name), dtype)
this_layer = this_layer.swapaxes(0, 1)
this_layer = this_layer.reshape(this_layer.shape + (1,))
Expand All @@ -38,8 +55,8 @@ def to_target_datatype(data: np.ndarray, target_dtype: np.dtype) -> np.ndarray:
return (data / factor).astype(target_dtype)


class Dm3ImageReader:
def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
class Dm3ImageReader(ImageReader):
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
dm3_file = DM3(file_name)
this_layer = to_target_datatype(dm3_file.imagedata, dtype)
this_layer = this_layer.swapaxes(0, 1)
Expand All @@ -55,7 +72,7 @@ def read_channel_count(self, _file_name: str) -> int:
return 1


class Dm4ImageReader:
class Dm4ImageReader(ImageReader):
def _read_tags(self, dm4file: DM4File) -> Tuple[DM4File.DM4TagDir, DM4TagHeader]:
tags = dm4file.read_directory()
image_data_tag = (
Expand All @@ -78,7 +95,7 @@ def _read_dimensions(
)
return width, height

def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
dm4file = DM4File.open(file_name)
image_data_tag, image_tag = self._read_tags(dm4file)
width, height = self._read_dimensions(dm4file, image_data_tag)
Expand All @@ -94,7 +111,6 @@ def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
return data

def read_dimensions(self, file_name: str) -> Tuple[int, int]:

dm4file = DM4File.open(file_name)
image_data_tag, _ = self._read_tags(dm4file)
dimensions = self._read_dimensions(dm4file, image_data_tag)
Expand All @@ -107,25 +123,83 @@ def read_channel_count(self, _file_name: str) -> int:
return 1


class ImageReader:
def find_count_of_axis(tif_file: TiffFile, axis: str) -> int:
assert len(tif_file.series) == 1, "only single tif series are supported"
tif_series = tif_file.series[0]
index = tif_series.axes.find(axis)
if index == -1:
return 1
else:
return tif_series.shape[index] # pylint: disable=unsubscriptable-object


class TiffImageReader(ImageReader):
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
with TiffFile(file_name) as tif_file:
num_channels = self.read_channel_count(file_name)
if len(tif_file.pages) > num_channels:
data = np.array(
list(
map(
lambda x: x.asarray(),
tif_file.pages[
z_slice * num_channels : z_slice * num_channels
+ num_channels
],
)
),
dtype,
)
else:
data = np.array(
list(map(lambda x: x.asarray(), tif_file.pages[0:num_channels])),
dtype,
)
# transpose data to shape(x, y, channel_count)
data = np.transpose(
data,
(
tif_file.pages[0].axes.find("X") + 1,
tif_file.pages[0].axes.find("Y") + 1,
0,
),
)
data = data.reshape(data.shape + (1,))
return data

def read_dimensions(self, file_name: str) -> Tuple[int, int]:
with TiffFile(file_name) as tif_file:
return find_count_of_axis(tif_file, "X"), find_count_of_axis(tif_file, "Y")

def read_channel_count(self, file_name: str) -> int:
with TiffFile(file_name) as tif_file:
return find_count_of_axis(tif_file, "C")

def read_z_slices_per_file(self, file_name: str) -> int:
with TiffFile(file_name) as tif_file:
return find_count_of_axis(tif_file, "Z")


class ImageReaderManager:
def __init__(self) -> None:
self.readers: Dict[
str, Union[PillowImageReader, Dm3ImageReader, Dm4ImageReader]
str,
Union[TiffImageReader, PillowImageReader, Dm3ImageReader, Dm4ImageReader],
] = {
".tif": PillowImageReader(),
".tiff": PillowImageReader(),
".tif": TiffImageReader(),
".tiff": TiffImageReader(),
".jpg": PillowImageReader(),
".jpeg": PillowImageReader(),
".png": PillowImageReader(),
".dm3": Dm3ImageReader(),
".dm4": Dm4ImageReader(),
}

def read_array(self, file_name: str, dtype: np.dtype) -> np.ndarray:
def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray:
_, ext = path.splitext(file_name)

# Image shape will be (x, y, channel_count, z=1) or (x, y, z=1)
image = self.readers[ext].read_array(file_name, dtype)
image = self.readers[ext].read_array(file_name, dtype, z_slice)
# Standardize the image shape to (x, y, channel_count, z=1)
if image.ndim == 3:
image = image.reshape(image.shape + (1,))
Expand All @@ -140,5 +214,9 @@ def read_channel_count(self, file_name: str) -> int:
_, ext = path.splitext(file_name)
return self.readers[ext].read_channel_count(file_name)

def read_z_slices_per_file(self, file_name: str) -> int:
_, ext = path.splitext(file_name)
return self.readers[ext].read_z_slices_per_file(file_name)


image_reader = ImageReader()
image_reader = ImageReaderManager()
2 changes: 1 addition & 1 deletion wkcuber/tile_cubing.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def tile_cubing_job(
if file_name:
# read the image
image = read_image_file(
file_name, target_wkw_info.header.voxel_type
file_name, target_wkw_info.header.voxel_type, z
)
slices.append(image)
else:
Expand Down

0 comments on commit 54f7bf0

Please sign in to comment.