Skip to content

Commit

Permalink
Auto-detect the right conversion (#268)
Browse files Browse the repository at this point in the history
* [WIP] auto detect conversion

* update formatting

* fix linting

* fix parameter list

* [WIP] implement pr feedback

* search manually for source path

* use unified parser and detect other args

* fix typing

* fix tile cubing

* only access dtype inside args namespace

* fix typing

* remove debug output

* update mypy

* add knossos conversion

* add knossos tests

* pr feedback 1

* [WIP] implement pr feedback 2

* fix tests

* resolve remaining todos

* add nifti detection

* implement pr feedback

* convert all knossos mags and write metadata

* add missing logging setup

* add missing logging setup

* fix old tests

* last tweaks
  • Loading branch information
youri-k authored Mar 18, 2021
1 parent 2ea1ddd commit 1ab2dc2
Show file tree
Hide file tree
Showing 16 changed files with 951 additions and 79 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ The tools are modular components to allow easy integration into existing pipelin

## Features

* `wkcuber`: Convert image stacks to fully ready WKW datasets (includes downsampling, compressing and metadata generation)
* `wkcuber`: Convert supported input files to fully ready WKW datasets (includes type detection, downsampling, compressing and metadata generation)
* `wkcuber.convert_image_stack_to_wkw`: Convert image stacks to fully ready WKW datasets (includes downsampling, compressing and metadata generation)
* `wkcuber.export_wkw_as_tiff`: Convert WKW datasets to a tiff stack (writing as tiles to a `z/y/x.tiff` folder structure is also supported)
* `wkcuber.cubing`: Convert image stacks (e.g., `tiff`, `jpg`, `png`, `dm3`) to WKW cubes
* `wkcuber.cubing`: Convert image stacks (e.g., `tiff`, `jpg`, `png`, `dm3`, `dm4`) to WKW cubes
* `wkcuber.tile_cubing`: Convert tiled image stacks (e.g. in `z/y/x.ext` folder structure) to WKW cubes
* `wkcuber.convert_knossos`: Convert KNOSSOS cubes to WKW cubes
* `wkcuber.convert_nifti`: Convert NIFTI files to WKW files (Currently without applying transformations).
Expand Down
18 changes: 18 additions & 0 deletions tests/scripts/auto_detection.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
set -xe

# create superfolder, so we can check how the autodetection deals with nested structures
mkdir -p testdata/superfolder/superfolder

# test wkw detection
python -m wkcuber.converter \
--scale 11.24,11.24,25 \
testdata/WT1_wkw testoutput/autodetection/wkw | grep -q "Already a WKW dataset."

# test wkw detection in subfolder
mv testdata/WT1_wkw testdata/superfolder/superfolder/WT1_wkw

python -m wkcuber.converter \
--scale 11.24,11.24,25 \
testdata/superfolder testoutput/autodetection/wkw | grep -q "Already a WKW dataset."

mv testdata/superfolder/superfolder/WT1_wkw testdata/WT1_wkw
2 changes: 1 addition & 1 deletion tests/scripts/simple_anisotropic_tiff_cubing.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
set -xe
mkdir -p testoutput/tiff2
python -m wkcuber \
python -m wkcuber.convert_image_stack_to_wkw \
--jobs 2 \
--batch_size 8 \
--layer_name color \
Expand Down
2 changes: 1 addition & 1 deletion tests/scripts/simple_tiff_cubing.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
set -xe
mkdir -p testoutput/tiff2
python -m wkcuber \
python -m wkcuber.convert_image_stack_to_wkw \
--jobs 2 \
--batch_size 8 \
--layer_name color \
Expand Down
2 changes: 1 addition & 1 deletion tests/scripts/simple_tiff_cubing_no_compression.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
set -xe
mkdir -p testoutput/tiff3
python -m wkcuber \
python -m wkcuber.convert_image_stack_to_wkw \
--jobs 2 \
--batch_size 8 \
--layer_name color \
Expand Down
237 changes: 237 additions & 0 deletions tests/test_auto_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
from wkcuber.converter import ImageStackConverter, KnossosConverter

TEST_PREFIXES = ["", "/", "../"]


def test_tiff_dataset_name_and_layer_name_detection() -> None:
for prefix in TEST_PREFIXES:

# test if ds name and layer name are correctly detected
converter = ImageStackConverter()
converter.source_files = [
prefix + "test/color/001.tif",
prefix + "test/color/002.tif",
prefix + "test/color/003.tif",
]
(
dataset_name,
layer_path_to_layer_name,
) = converter.detect_dataset_name_and_layer_path_to_layer_name()
assert dataset_name == "test"
assert len(layer_path_to_layer_name) == 1
assert list(layer_path_to_layer_name)[0] == prefix + "test/color"
assert list(layer_path_to_layer_name.values())[0] == "color"

# test if in subfolder
converter = ImageStackConverter()
converter.source_files = [
prefix + "superfolder/test/color/001.tif",
prefix + "superfolder/test/color/002.tif",
prefix + "superfolder/test/color/003.tif",
]
(
dataset_name,
layer_path_to_layer_name,
) = converter.detect_dataset_name_and_layer_path_to_layer_name()
assert dataset_name == "test"
assert len(layer_path_to_layer_name) == 1
assert list(layer_path_to_layer_name)[0] == prefix + "superfolder/test/color"
assert list(layer_path_to_layer_name.values())[0] == "color"

# test for multiple layers
converter = ImageStackConverter()
converter.source_files = [
prefix + "test/color/001.tif",
prefix + "test/color/002.tif",
prefix + "test/color/003.tif",
prefix + "test/segmentation/001.tif",
prefix + "test/segmentation/002.tif",
prefix + "test/segmentation/003.tif",
]
(
dataset_name,
layer_path_to_layer_name,
) = converter.detect_dataset_name_and_layer_path_to_layer_name()
assert dataset_name == "test"
assert len(layer_path_to_layer_name) == 2
assert prefix + "test/color" in layer_path_to_layer_name.keys()
assert prefix + "test/segmentation" in layer_path_to_layer_name.keys()
assert "color" in layer_path_to_layer_name.values()
assert "segmentation" in layer_path_to_layer_name.values()

# test if in single folder and folder name is layer name
converter = ImageStackConverter()
converter.source_files = [
prefix + "color/001.tif",
prefix + "color/002.tif",
prefix + "color/003.tif",
]
(
dataset_name,
layer_path_to_layer_name,
) = converter.detect_dataset_name_and_layer_path_to_layer_name()
assert dataset_name == "dataset"
assert len(layer_path_to_layer_name) == 1
assert list(layer_path_to_layer_name)[0] == prefix + "color"
assert list(layer_path_to_layer_name.values())[0] == "color"

# test if in single folder and folder name is ds name
converter = ImageStackConverter()
converter.source_files = [
prefix + "test_dataset/001.tif",
prefix + "test_dataset/002.tif",
prefix + "test_dataset/003.tif",
]
(
dataset_name,
layer_path_to_layer_name,
) = converter.detect_dataset_name_and_layer_path_to_layer_name()
assert dataset_name == "test_dataset"
assert len(layer_path_to_layer_name) == 1
assert list(layer_path_to_layer_name)[0] == prefix + "test_dataset"
assert list(layer_path_to_layer_name.values())[0] == "color"

# test if single file in folder
converter = ImageStackConverter()
converter.source_files = [prefix + "test_dataset/brain.tif"]
(
dataset_name,
layer_path_to_layer_name,
) = converter.detect_dataset_name_and_layer_path_to_layer_name()
assert dataset_name == "test_dataset"
assert len(layer_path_to_layer_name) == 1
assert list(layer_path_to_layer_name)[0] == prefix + "test_dataset/brain.tif"
assert list(layer_path_to_layer_name.values())[0] == "brain"

# test if single file
converter = ImageStackConverter()
converter.source_files = [prefix + "brain.tif"]
(
dataset_name,
layer_path_to_layer_name,
) = converter.detect_dataset_name_and_layer_path_to_layer_name()
assert dataset_name == "brain"
assert len(layer_path_to_layer_name) == 1
assert list(layer_path_to_layer_name)[0] == prefix + "brain.tif"
assert list(layer_path_to_layer_name.values())[0] == "color"

# test for multiple files with no parent directory
converter = ImageStackConverter()
converter.source_files = [prefix + "001.tif", prefix + "002.tif"]
(
dataset_name,
layer_path_to_layer_name,
) = converter.detect_dataset_name_and_layer_path_to_layer_name()
assert dataset_name == "dataset"
assert len(layer_path_to_layer_name) == 1
assert list(layer_path_to_layer_name.keys())[0] == prefix
assert list(layer_path_to_layer_name.values())[0] == "color"


def test_knossos_dataset_name_and_layer_path_detection() -> None:
for prefix in TEST_PREFIXES:

# test if dataset name and layer name and mag are correct
converter = KnossosConverter()
converter.source_files = [
prefix
+ "knossos/color/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
prefix
+ "knossos/color/1/x0000/y0000/z0001/test_mag1_x0000_y0000_z0001.raw",
prefix
+ "knossos/color/1/x0000/y0001/z0000/test_mag1_x0000_y0001_z0000.raw",
]
dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag()
assert dataset_name == "knossos"
assert len(layer_paths) == 1
assert list(layer_paths.keys())[0] == prefix + "knossos/color"
assert list(layer_paths.values())[0] == {"1"}

# test if in subfolder
converter = KnossosConverter()
converter.source_files = [
prefix
+ "superfolder/superfolder/knossos/color/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
]
dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag()
assert dataset_name == "knossos"
assert len(layer_paths) == 1
assert (
list(layer_paths.keys())[0]
== prefix + "superfolder/superfolder/knossos/color"
)
assert list(layer_paths.values())[0] == {"1"}

# test for multiple layer
converter = KnossosConverter()
converter.source_files = [
prefix
+ "knossos/color/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
prefix
+ "knossos/segmentation/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
]
dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag()
assert dataset_name == "knossos"
assert len(layer_paths) == 2
assert prefix + "knossos/color" in layer_paths.keys()
assert prefix + "knossos/segmentation" in layer_paths.keys()
assert all(map(lambda m: m == {"1"}, layer_paths.values()))

# test if only layer folder given
converter = KnossosConverter()
converter.source_files = [
prefix + "color/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
]
dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag()
assert dataset_name == "dataset"
assert len(layer_paths) == 1
assert list(layer_paths.keys())[0] == prefix + "color"
assert list(layer_paths.values())[0] == {"1"}

# test if only mag folder given
converter = KnossosConverter()
converter.source_files = [
prefix + "1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
]
dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag()
assert dataset_name == "dataset"
assert len(layer_paths) == 1
assert list(layer_paths.keys())[0] == prefix
assert list(layer_paths.values())[0] == {"1"}

# test if already in mag folder
converter = KnossosConverter()
converter.source_files = [
prefix + "x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
]
dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag()
assert dataset_name == "dataset"
assert len(layer_paths) == 1
assert list(layer_paths.keys())[0] == prefix
assert list(layer_paths.values())[0] == {""}

# test if too short path gets detected
converter = KnossosConverter()
converter.source_files = [
prefix + "y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
]
assertion_error = False
try:
_, _ = converter.detect_dataset_and_layer_paths_with_mag()
except AssertionError:
assertion_error = True
assert assertion_error

# test for multiple mags
converter = KnossosConverter()
converter.source_files = [
prefix
+ "knossos/color/2/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
prefix
+ "knossos/color/4/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw",
]
dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag()
assert dataset_name == "knossos"
assert len(layer_paths) == 1
assert list(layer_paths.keys())[0] == prefix + "knossos/color"
assert list(layer_paths.values())[0] == {"2", "4"}
Loading

0 comments on commit 1ab2dc2

Please sign in to comment.