diff --git a/README.md b/README.md index a86a32e9c..5b41d0b0b 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,10 @@ The tools are modular components to allow easy integration into existing pipelin ## Features -* `wkcuber`: Convert image stacks to fully ready WKW datasets (includes downsampling, compressing and metadata generation) +* `wkcuber`: Convert supported input files to fully ready WKW datasets (includes type detection, downsampling, compressing and metadata generation) +* `wkcuber.convert_image_stack_to_wkw`: Convert image stacks to fully ready WKW datasets (includes downsampling, compressing and metadata generation) * `wkcuber.export_wkw_as_tiff`: Convert WKW datasets to a tiff stack (writing as tiles to a `z/y/x.tiff` folder structure is also supported) -* `wkcuber.cubing`: Convert image stacks (e.g., `tiff`, `jpg`, `png`, `dm3`) to WKW cubes +* `wkcuber.cubing`: Convert image stacks (e.g., `tiff`, `jpg`, `png`, `dm3`, `dm4`) to WKW cubes * `wkcuber.tile_cubing`: Convert tiled image stacks (e.g. in `z/y/x.ext` folder structure) to WKW cubes * `wkcuber.convert_knossos`: Convert KNOSSOS cubes to WKW cubes * `wkcuber.convert_nifti`: Convert NIFTI files to WKW files (Currently without applying transformations). diff --git a/tests/scripts/auto_detection.sh b/tests/scripts/auto_detection.sh new file mode 100755 index 000000000..3db007dd2 --- /dev/null +++ b/tests/scripts/auto_detection.sh @@ -0,0 +1,18 @@ +set -xe + +# create superfolder, so we can check how the autodetection deals with nested structures +mkdir -p testdata/superfolder/superfolder + +# test wkw detection +python -m wkcuber.converter \ + --scale 11.24,11.24,25 \ + testdata/WT1_wkw testoutput/autodetection/wkw | grep -q "Already a WKW dataset." + +# test wkw detection in subfolder +mv testdata/WT1_wkw testdata/superfolder/superfolder/WT1_wkw + +python -m wkcuber.converter \ + --scale 11.24,11.24,25 \ + testdata/superfolder testoutput/autodetection/wkw | grep -q "Already a WKW dataset." + +mv testdata/superfolder/superfolder/WT1_wkw testdata/WT1_wkw \ No newline at end of file diff --git a/tests/scripts/simple_anisotropic_tiff_cubing.sh b/tests/scripts/simple_anisotropic_tiff_cubing.sh index 84449d8a9..279769ec0 100755 --- a/tests/scripts/simple_anisotropic_tiff_cubing.sh +++ b/tests/scripts/simple_anisotropic_tiff_cubing.sh @@ -1,6 +1,6 @@ set -xe mkdir -p testoutput/tiff2 -python -m wkcuber \ +python -m wkcuber.convert_image_stack_to_wkw \ --jobs 2 \ --batch_size 8 \ --layer_name color \ diff --git a/tests/scripts/simple_tiff_cubing.sh b/tests/scripts/simple_tiff_cubing.sh index 2d5d5fa2c..4354e128c 100755 --- a/tests/scripts/simple_tiff_cubing.sh +++ b/tests/scripts/simple_tiff_cubing.sh @@ -1,6 +1,6 @@ set -xe mkdir -p testoutput/tiff2 -python -m wkcuber \ +python -m wkcuber.convert_image_stack_to_wkw \ --jobs 2 \ --batch_size 8 \ --layer_name color \ diff --git a/tests/scripts/simple_tiff_cubing_no_compression.sh b/tests/scripts/simple_tiff_cubing_no_compression.sh index ba519079c..42bbcdfd7 100755 --- a/tests/scripts/simple_tiff_cubing_no_compression.sh +++ b/tests/scripts/simple_tiff_cubing_no_compression.sh @@ -1,6 +1,6 @@ set -xe mkdir -p testoutput/tiff3 -python -m wkcuber \ +python -m wkcuber.convert_image_stack_to_wkw \ --jobs 2 \ --batch_size 8 \ --layer_name color \ diff --git a/tests/test_auto_detection.py b/tests/test_auto_detection.py new file mode 100644 index 000000000..ef4ac419c --- /dev/null +++ b/tests/test_auto_detection.py @@ -0,0 +1,237 @@ +from wkcuber.converter import ImageStackConverter, KnossosConverter + +TEST_PREFIXES = ["", "/", "../"] + + +def test_tiff_dataset_name_and_layer_name_detection() -> None: + for prefix in TEST_PREFIXES: + + # test if ds name and layer name are correctly detected + converter = ImageStackConverter() + converter.source_files = [ + prefix + "test/color/001.tif", + prefix + "test/color/002.tif", + prefix + "test/color/003.tif", + ] + ( + dataset_name, + layer_path_to_layer_name, + ) = converter.detect_dataset_name_and_layer_path_to_layer_name() + assert dataset_name == "test" + assert len(layer_path_to_layer_name) == 1 + assert list(layer_path_to_layer_name)[0] == prefix + "test/color" + assert list(layer_path_to_layer_name.values())[0] == "color" + + # test if in subfolder + converter = ImageStackConverter() + converter.source_files = [ + prefix + "superfolder/test/color/001.tif", + prefix + "superfolder/test/color/002.tif", + prefix + "superfolder/test/color/003.tif", + ] + ( + dataset_name, + layer_path_to_layer_name, + ) = converter.detect_dataset_name_and_layer_path_to_layer_name() + assert dataset_name == "test" + assert len(layer_path_to_layer_name) == 1 + assert list(layer_path_to_layer_name)[0] == prefix + "superfolder/test/color" + assert list(layer_path_to_layer_name.values())[0] == "color" + + # test for multiple layers + converter = ImageStackConverter() + converter.source_files = [ + prefix + "test/color/001.tif", + prefix + "test/color/002.tif", + prefix + "test/color/003.tif", + prefix + "test/segmentation/001.tif", + prefix + "test/segmentation/002.tif", + prefix + "test/segmentation/003.tif", + ] + ( + dataset_name, + layer_path_to_layer_name, + ) = converter.detect_dataset_name_and_layer_path_to_layer_name() + assert dataset_name == "test" + assert len(layer_path_to_layer_name) == 2 + assert prefix + "test/color" in layer_path_to_layer_name.keys() + assert prefix + "test/segmentation" in layer_path_to_layer_name.keys() + assert "color" in layer_path_to_layer_name.values() + assert "segmentation" in layer_path_to_layer_name.values() + + # test if in single folder and folder name is layer name + converter = ImageStackConverter() + converter.source_files = [ + prefix + "color/001.tif", + prefix + "color/002.tif", + prefix + "color/003.tif", + ] + ( + dataset_name, + layer_path_to_layer_name, + ) = converter.detect_dataset_name_and_layer_path_to_layer_name() + assert dataset_name == "dataset" + assert len(layer_path_to_layer_name) == 1 + assert list(layer_path_to_layer_name)[0] == prefix + "color" + assert list(layer_path_to_layer_name.values())[0] == "color" + + # test if in single folder and folder name is ds name + converter = ImageStackConverter() + converter.source_files = [ + prefix + "test_dataset/001.tif", + prefix + "test_dataset/002.tif", + prefix + "test_dataset/003.tif", + ] + ( + dataset_name, + layer_path_to_layer_name, + ) = converter.detect_dataset_name_and_layer_path_to_layer_name() + assert dataset_name == "test_dataset" + assert len(layer_path_to_layer_name) == 1 + assert list(layer_path_to_layer_name)[0] == prefix + "test_dataset" + assert list(layer_path_to_layer_name.values())[0] == "color" + + # test if single file in folder + converter = ImageStackConverter() + converter.source_files = [prefix + "test_dataset/brain.tif"] + ( + dataset_name, + layer_path_to_layer_name, + ) = converter.detect_dataset_name_and_layer_path_to_layer_name() + assert dataset_name == "test_dataset" + assert len(layer_path_to_layer_name) == 1 + assert list(layer_path_to_layer_name)[0] == prefix + "test_dataset/brain.tif" + assert list(layer_path_to_layer_name.values())[0] == "brain" + + # test if single file + converter = ImageStackConverter() + converter.source_files = [prefix + "brain.tif"] + ( + dataset_name, + layer_path_to_layer_name, + ) = converter.detect_dataset_name_and_layer_path_to_layer_name() + assert dataset_name == "brain" + assert len(layer_path_to_layer_name) == 1 + assert list(layer_path_to_layer_name)[0] == prefix + "brain.tif" + assert list(layer_path_to_layer_name.values())[0] == "color" + + # test for multiple files with no parent directory + converter = ImageStackConverter() + converter.source_files = [prefix + "001.tif", prefix + "002.tif"] + ( + dataset_name, + layer_path_to_layer_name, + ) = converter.detect_dataset_name_and_layer_path_to_layer_name() + assert dataset_name == "dataset" + assert len(layer_path_to_layer_name) == 1 + assert list(layer_path_to_layer_name.keys())[0] == prefix + assert list(layer_path_to_layer_name.values())[0] == "color" + + +def test_knossos_dataset_name_and_layer_path_detection() -> None: + for prefix in TEST_PREFIXES: + + # test if dataset name and layer name and mag are correct + converter = KnossosConverter() + converter.source_files = [ + prefix + + "knossos/color/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + prefix + + "knossos/color/1/x0000/y0000/z0001/test_mag1_x0000_y0000_z0001.raw", + prefix + + "knossos/color/1/x0000/y0001/z0000/test_mag1_x0000_y0001_z0000.raw", + ] + dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag() + assert dataset_name == "knossos" + assert len(layer_paths) == 1 + assert list(layer_paths.keys())[0] == prefix + "knossos/color" + assert list(layer_paths.values())[0] == {"1"} + + # test if in subfolder + converter = KnossosConverter() + converter.source_files = [ + prefix + + "superfolder/superfolder/knossos/color/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + ] + dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag() + assert dataset_name == "knossos" + assert len(layer_paths) == 1 + assert ( + list(layer_paths.keys())[0] + == prefix + "superfolder/superfolder/knossos/color" + ) + assert list(layer_paths.values())[0] == {"1"} + + # test for multiple layer + converter = KnossosConverter() + converter.source_files = [ + prefix + + "knossos/color/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + prefix + + "knossos/segmentation/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + ] + dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag() + assert dataset_name == "knossos" + assert len(layer_paths) == 2 + assert prefix + "knossos/color" in layer_paths.keys() + assert prefix + "knossos/segmentation" in layer_paths.keys() + assert all(map(lambda m: m == {"1"}, layer_paths.values())) + + # test if only layer folder given + converter = KnossosConverter() + converter.source_files = [ + prefix + "color/1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + ] + dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag() + assert dataset_name == "dataset" + assert len(layer_paths) == 1 + assert list(layer_paths.keys())[0] == prefix + "color" + assert list(layer_paths.values())[0] == {"1"} + + # test if only mag folder given + converter = KnossosConverter() + converter.source_files = [ + prefix + "1/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + ] + dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag() + assert dataset_name == "dataset" + assert len(layer_paths) == 1 + assert list(layer_paths.keys())[0] == prefix + assert list(layer_paths.values())[0] == {"1"} + + # test if already in mag folder + converter = KnossosConverter() + converter.source_files = [ + prefix + "x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + ] + dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag() + assert dataset_name == "dataset" + assert len(layer_paths) == 1 + assert list(layer_paths.keys())[0] == prefix + assert list(layer_paths.values())[0] == {""} + + # test if too short path gets detected + converter = KnossosConverter() + converter.source_files = [ + prefix + "y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + ] + assertion_error = False + try: + _, _ = converter.detect_dataset_and_layer_paths_with_mag() + except AssertionError: + assertion_error = True + assert assertion_error + + # test for multiple mags + converter = KnossosConverter() + converter.source_files = [ + prefix + + "knossos/color/2/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + prefix + + "knossos/color/4/x0000/y0000/z0000/test_mag1_x0000_y0000_z0000.raw", + ] + dataset_name, layer_paths = converter.detect_dataset_and_layer_paths_with_mag() + assert dataset_name == "knossos" + assert len(layer_paths) == 1 + assert list(layer_paths.keys())[0] == prefix + "knossos/color" + assert list(layer_paths.values())[0] == {"2", "4"} diff --git a/wkcuber/__main__.py b/wkcuber/__main__.py index 2f4bbb3f9..6c6fe07de 100644 --- a/wkcuber/__main__.py +++ b/wkcuber/__main__.py @@ -1,14 +1,37 @@ -from .cubing import cubing, create_parser as create_cubing_parser +from typing import List, Dict + from .downsampling import downsample_mags_isotropic, downsample_mags_anisotropic from .compress import compress_mag_inplace -from .metadata import write_webknossos_metadata, refresh_metadata -from .utils import add_isotropic_flag, setup_logging, add_scale_flag +from .metadata import refresh_metadata +from .utils import add_isotropic_flag, setup_logging from .mag import Mag +from .converter import ( + create_parser as create_conversion_parser, + main as auto_detect_and_run_conversion, +) from argparse import Namespace, ArgumentParser +from pathlib import Path + + +def detect_present_mags(target_path: str) -> Dict[Path, List[Mag]]: + layer_path_to_mags: Dict[Path, List[Mag]] = dict() + dataset_path = Path(target_path) + layer_paths = list([p for p in dataset_path.iterdir() if p.is_dir()]) + for layer_p in layer_paths: + layer_path_to_mags.setdefault(layer_p, list()) + mag_paths = list([p for p in layer_p.iterdir() if p.is_dir()]) + for mag_p in mag_paths: + try: + mag = Mag(mag_p.stem) + except (AssertionError, ValueError) as _: + continue + layer_path_to_mags[layer_p].append(mag) + + return layer_path_to_mags def create_parser() -> ArgumentParser: - parser = create_cubing_parser() + parser = create_conversion_parser() parser.add_argument( "--max_mag", @@ -26,8 +49,6 @@ def create_parser() -> ArgumentParser: ) parser.add_argument("--name", "-n", help="Name of the dataset", default=None) - - add_scale_flag(parser) add_isotropic_flag(parser) return parser @@ -36,52 +57,49 @@ def create_parser() -> ArgumentParser: def main(args: Namespace) -> None: setup_logging(args) - bounding_box = cubing( - args.source_path, - args.target_path, - args.layer_name, - args.dtype, - args.batch_size, - args, - ) + auto_detect_and_run_conversion(args) - write_webknossos_metadata( - args.target_path, - args.name, - args.scale, - compute_max_id=False, - exact_bounding_box=bounding_box, - ) + layer_path_to_mags: Dict[Path, List[Mag]] = detect_present_mags(args.target_path) if not args.no_compress: - compress_mag_inplace(args.target_path, args.layer_name, Mag(1), args) + for (layer_path, mags) in layer_path_to_mags.items(): + layer_name = layer_path.stem + for mag in mags: + compress_mag_inplace(args.target_path, layer_name, mag, args) if not args.isotropic: - downsample_mags_anisotropic( - args.target_path, - args.layer_name, - Mag(1), - Mag(args.max_mag), - args.scale, - "default", - not args.no_compress, - args=args, - ) + for (layer_path, mags) in layer_path_to_mags.items(): + layer_name = layer_path.stem + mags.sort() + downsample_mags_anisotropic( + args.target_path, + layer_name, + mags[-1], + Mag(args.max_mag), + args.scale, + "default", + not args.no_compress, + args=args, + ) else: - downsample_mags_isotropic( - args.target_path, - args.layer_name, - Mag(1), - Mag(args.max_mag), - "default", - not args.no_compress, - args=args, - ) + for (layer_path, mags) in layer_path_to_mags.items(): + layer_name = layer_path.stem + mags.sort() + downsample_mags_isotropic( + args.target_path, + layer_name, + mags[-1], + Mag(args.max_mag), + "default", + not args.no_compress, + args=args, + ) refresh_metadata(args.target_path) if __name__ == "__main__": - parsed_args: Namespace = create_parser().parse_args() - main(parsed_args) + args = create_parser().parse_args() + setup_logging(args) + main(args) diff --git a/wkcuber/convert_image_stack_to_wkw.py b/wkcuber/convert_image_stack_to_wkw.py new file mode 100644 index 000000000..85880eb4b --- /dev/null +++ b/wkcuber/convert_image_stack_to_wkw.py @@ -0,0 +1,86 @@ +from .cubing import cubing, create_parser as create_cubing_parser +from .downsampling import downsample_mags_isotropic, downsample_mags_anisotropic +from .compress import compress_mag_inplace +from .metadata import write_webknossos_metadata, refresh_metadata +from .utils import add_isotropic_flag, setup_logging, add_scale_flag +from .mag import Mag +from argparse import Namespace, ArgumentParser + + +def create_parser() -> ArgumentParser: + parser = create_cubing_parser() + + parser.add_argument( + "--max_mag", + "-m", + help="Max resolution to be downsampled. Needs to be a power of 2.", + type=int, + default=32, + ) + + parser.add_argument( + "--no_compress", + help="Don't compress this data", + default=False, + action="store_true", + ) + + parser.add_argument("--name", "-n", help="Name of the dataset", default=None) + add_scale_flag(parser) + add_isotropic_flag(parser) + + return parser + + +def main(args: Namespace) -> None: + setup_logging(args) + + bounding_box = cubing( + args.source_path, + args.target_path, + args.layer_name, + args.batch_size if "batch_size" in args else None, + args, + ) + + write_webknossos_metadata( + args.target_path, + args.name, + args.scale, + compute_max_id=False, + exact_bounding_box=bounding_box, + ) + + if not args.no_compress: + compress_mag_inplace(args.target_path, args.layer_name, Mag(1), args) + + if not args.isotropic: + downsample_mags_anisotropic( + args.target_path, + args.layer_name, + Mag(1), + Mag(args.max_mag), + args.scale, + "default", + not args.no_compress, + args=args, + ) + + else: + downsample_mags_isotropic( + args.target_path, + args.layer_name, + Mag(1), + Mag(args.max_mag), + "default", + not args.no_compress, + args=args, + ) + + refresh_metadata(args.target_path) + + +if __name__ == "__main__": + args = create_parser().parse_args() + setup_logging(args) + main(args) diff --git a/wkcuber/convert_knossos.py b/wkcuber/convert_knossos.py index 83a3ba315..b3aab981a 100644 --- a/wkcuber/convert_knossos.py +++ b/wkcuber/convert_knossos.py @@ -107,10 +107,14 @@ def convert_knossos( wait_and_ensure_success(executor.map_to_futures(convert_cube_job, job_args)) +def main(args: Namespace) -> None: + convert_knossos( + args.source_path, args.target_path, args.layer_name, args.dtype, args.mag, args + ) + + if __name__ == "__main__": args = create_parser().parse_args() setup_logging(args) - convert_knossos( - args.source_path, args.target_path, args.layer_name, args.dtype, args.mag, args - ) + main(args) diff --git a/wkcuber/convert_nifti.py b/wkcuber/convert_nifti.py index 5cde6e8cb..2f991ff7d 100644 --- a/wkcuber/convert_nifti.py +++ b/wkcuber/convert_nifti.py @@ -1,6 +1,6 @@ import logging import time -from argparse import ArgumentParser +from argparse import ArgumentParser, Namespace from pathlib import Path from sklearn.preprocessing import LabelEncoder from typing import Tuple, Optional, Union, cast @@ -13,8 +13,8 @@ from wkcuber.utils import ( DEFAULT_WKW_FILE_LEN, DEFAULT_WKW_VOXELS_PER_BLOCK, - add_scale_flag, add_verbose_flag, + add_scale_flag, pad_or_crop_to_size_and_topleft, parse_bounding_box, setup_logging, @@ -43,7 +43,7 @@ def create_parser() -> ArgumentParser: parser.add_argument( "--is_segmentation_layer", - "-s", + "-sl", help="When converting one layer, signals whether layer is segmentation layer. " "When converting a folder, this option is ignored", default=False, @@ -102,7 +102,6 @@ def create_parser() -> ArgumentParser: def to_target_datatype( data: np.ndarray, target_dtype: str, is_segmentation_layer: bool ) -> np.ndarray: - if is_segmentation_layer: original_shape = data.shape label_encoder = LabelEncoder() @@ -321,10 +320,7 @@ def convert_folder_nifti( ) -def main() -> None: - args = create_parser().parse_args() - setup_logging(args) - +def main(args: Namespace) -> None: source_path = Path(args.source_path) flip_axes = None @@ -363,4 +359,7 @@ def main() -> None: if __name__ == "__main__": - main() + args = create_parser().parse_args() + setup_logging(args) + + main(args) diff --git a/wkcuber/converter.py b/wkcuber/converter.py new file mode 100644 index 000000000..92ecb58af --- /dev/null +++ b/wkcuber/converter.py @@ -0,0 +1,479 @@ +from argparse import ArgumentParser, Namespace +from os import path, sep +from pathlib import Path +from typing import Iterable, List, Any, Tuple, Dict, Set, Callable, cast + +from .cubing import ( + cubing as cube_image_stack, + create_parser as create_image_stack_parser, +) +from .convert_knossos import ( + main as convert_knossos, + create_parser as create_knossos_parser, +) +from .convert_nifti import main as convert_nifti, create_parser as create_nifti_parser +from .image_readers import image_reader +from .utils import find_files, add_scale_flag, logger, add_verbose_flag, setup_logging +from .metadata import write_webknossos_metadata + + +def create_parser() -> ArgumentParser: + parser = ArgumentParser( + epilog="If you want to pass more specific config values, please use the individual converters. See the readme for a complete overview." + ) + + parser.add_argument( + "source_path", help="Input file or directory containing the input files." + ) + + parser.add_argument( + "target_path", help="Output directory for the generated dataset." + ) + + add_scale_flag(parser) + add_verbose_flag(parser) + + return parser + + +def put_default_if_not_present(args: Namespace, name: str, default: Any) -> None: + dictionary = vars(args) + if name not in dictionary or dictionary[name] is None: + dictionary[name] = default + + +def put_default_from_argparser_if_not_present( + args: Namespace, argparser: ArgumentParser, name: str +) -> None: + put_default_if_not_present(args, name, argparser.get_default(name)) + + +def add_to_set_in_dictionary(dictionary: dict, key: str, value: Any) -> None: + dictionary.setdefault(key, set()) + dictionary[key].add(value) + + +def get_source_files( + input_path: str, extensions: Iterable[str], allows_single_file_input: bool +) -> List[str]: + if Path(input_path).is_dir(): + input_path = path.join(input_path, "**") + elif not allows_single_file_input: + return [] + + source_files = list(find_files(input_path, extensions)) + + return source_files + + +class Converter: + def __init__(self) -> None: + self.source_files: List[str] = [] + self.prefix: str = "" + + def accepts_input(self, source_path: str) -> bool: + raise NotImplementedError() + + def convert_input(self, args: Namespace) -> bool: + # returns True if metadata should be written after the conversion + raise NotImplementedError() + + def check_path_length_and_set_prefix(self) -> int: + first_split_path = self.source_files[0].split(sep) + traversal_depth = len(first_split_path) + self.prefix = "" + if first_split_path[0] == "": + self.prefix = "/" + elif first_split_path[0] == "..": + self.prefix = "../" + + assert all( + map( + lambda p: len(p.split(sep)) == traversal_depth, + self.source_files, + ) + ), "Cannot detect correct layer format. Please check the input directory." + + return traversal_depth + + def apply_handle_function( + self, handle_function: Callable, starts_with_prefix: bool + ) -> None: + for f in self.source_files: + split_path = f.split(sep) + if starts_with_prefix: + split_path = split_path[1:] + + handle_function(split_path) + + +class WkwConverter(Converter): + def accepts_input(self, source_path: str) -> bool: + source_files = get_source_files(source_path, {".wkw"}, False) + return len(source_files) > 0 + + def convert_input(self, args: Namespace) -> bool: + logger.info("Already a WKW dataset. No conversion necessary...") + return False + + +class NiftiConverter(Converter): + def accepts_input(self, source_path: str) -> bool: + source_files = get_source_files(source_path, {".nii"}, True) + self.source_files = source_files + return len(source_files) > 0 + + def convert_input(self, args: Namespace) -> bool: + logger.info("Converting Nifti dataset") + + # add missing config attributes with defaults + nifti_parser = create_nifti_parser() + if not hasattr(args, "dtype"): + logger.info("Assumed data type is uint8") + put_default_from_argparser_if_not_present(args, nifti_parser, "write_tiff") + put_default_from_argparser_if_not_present(args, nifti_parser, "dtype") + put_default_from_argparser_if_not_present( + args, nifti_parser, "use_orientation_header" + ) + put_default_from_argparser_if_not_present( + args, nifti_parser, "enforce_bounding_box" + ) + put_default_from_argparser_if_not_present(args, nifti_parser, "flip_axes") + put_default_from_argparser_if_not_present(args, nifti_parser, "verbose") + + if len(self.source_files) == 1: + source_file = self.source_files[0] + layer_name = path.splitext(path.basename(source_file))[0] + put_default_if_not_present( + args, "is_segmentation_layer", layer_name == "segmentation" + ) + args.layer_name = layer_name + args.source_path = source_file + else: + # We do not explicitly set the "color_file" option since we could not guess it any better than the internal algorithm + for p in self.source_files: + layer_name = path.splitext(path.basename(p))[0] + if layer_name == "segmentation": + put_default_if_not_present( + args, "segmentation_file", path.relpath(p, args.source_path) + ) + break + + convert_nifti(args) + + return False + + +class KnossosConverter(Converter): + def __init__(self) -> None: + super().__init__() + self.layer_path_to_mag_set: Dict[str, set] = dict() + self.dataset_names: Set[str] = set() + self.prefix: str = "" + + def accepts_input(self, source_path: str) -> bool: + source_files = get_source_files(source_path, {".raw"}, False) + self.source_files = list( + map(lambda p: cast(str, path.normpath(p)), source_files) + ) + + return len(self.source_files) > 0 + + def convert_input(self, args: Namespace) -> bool: + logger.info("Converting KNOSSOS dataset") + + # add missing config attributes with defaults + knossos_parser = create_knossos_parser() + put_default_from_argparser_if_not_present(args, knossos_parser, "verbose") + put_default_from_argparser_if_not_present(args, knossos_parser, "jobs") + put_default_from_argparser_if_not_present( + args, knossos_parser, "distribution_strategy" + ) + + if not hasattr(args, "dtype"): + logger.info("Assumed data type is uint8") + + put_default_from_argparser_if_not_present(args, knossos_parser, "dtype") + + ( + dataset_name, + layer_paths_and_mags, + ) = self.detect_dataset_and_layer_paths_with_mag() + put_default_if_not_present(args, "name", dataset_name) + + for layer_path, mags in layer_paths_and_mags.items(): + for mag in mags: + # if the mag path is empty, we are already inside the mag folder, so there is only mag. We guess that this is mag 1. + if mag != "": + try: + mag_int = int(mag) + except ValueError: + continue + else: + mag_int = 1 + args.mag = mag_int + args.source_path = path.join(layer_path, mag) + args.layer_name = ( + "color" + if path.basename(layer_path) == "" + else path.basename(layer_path) + ) + convert_knossos(args) + + return True + + def detect_dataset_and_layer_paths_with_mag( + self, + ) -> Tuple[str, Dict[str, Set[str]]]: + # Path structure for knossos is .../(dataset_name)/(layer_name)/(mag)folder/x0000/y0000/z0000/filename.raw + traversal_depth = self.check_path_length_and_set_prefix() + starts_with_prefix = self.prefix != "" + + assert ( + traversal_depth >= 4 if not starts_with_prefix else traversal_depth >= 5 + ), "Input Format is unreadable. Make sure to pass the path which points at least to a KNOSSOS magnification (e.g., testdata/knossos/color/1)." + + if starts_with_prefix: + traversal_depth = traversal_depth - 1 + + if traversal_depth == 4: + self.apply_handle_function(self.handle_path_length_4, starts_with_prefix) + elif traversal_depth == 5: + self.apply_handle_function(self.handle_path_length_5, starts_with_prefix) + elif traversal_depth == 6: + self.apply_handle_function(self.handle_path_length_6, starts_with_prefix) + else: + self.apply_handle_function( + self.handle_path_length_longer, starts_with_prefix + ) + + assert ( + len(self.dataset_names) == 1 + ), "More than one dataset found. Stopping conversion..." + assert ( + len(self.layer_path_to_mag_set) > 0 + ), "No layers found. Stopping conversion..." + + return self.dataset_names.pop(), self.layer_path_to_mag_set + + def handle_path_length_4( + self, + split_path: List[str], # pylint: disable=unused-argument + ) -> None: + # already inside the mag folder => (/)x0000/y0000/z0000/filename.raw + add_to_set_in_dictionary(self.layer_path_to_mag_set, self.prefix, "") + self.dataset_names.add("dataset") + + def handle_path_length_5( + self, + split_path: List[str], + ) -> None: + # only the mag folder is given, therefore the layer path is empty => (/)mag/x0000/y0000/z0000/filename.raw + add_to_set_in_dictionary(self.layer_path_to_mag_set, self.prefix, split_path[0]) + self.dataset_names.add("dataset") + + def handle_path_length_6( + self, + split_path: List[str], + ) -> None: + # additionally the layer folder is given, that should indicate a single layer as well => (/)layer/mag/x0000/y0000/z0000/filename.raw + add_to_set_in_dictionary( + self.layer_path_to_mag_set, self.prefix + split_path[0], split_path[1] + ) + self.dataset_names.add("dataset") + + def handle_path_length_longer( + self, + split_path: List[str], + ) -> None: + # also a dataset folder is given => (/../)dataset_name/layer/mag/x0000/y0000/z0000/filename.raw + layer_path = self.prefix + sep.join(split_path[0:-5]) + add_to_set_in_dictionary(self.layer_path_to_mag_set, layer_path, split_path[-5]) + self.dataset_names.add(split_path[-7]) + + +class ImageStackConverter(Converter): + def __init__(self) -> None: + super().__init__() + self.args: Namespace = Namespace() + self.layer_path_to_layer_name: Dict[str, str] = dict() + self.dataset_names: Set[str] = set() + + def accepts_input(self, source_path: str) -> bool: + source_files = get_source_files(source_path, image_reader.readers.keys(), True) + + if len(source_files) == 0: + return False + + _, ext = path.splitext(source_files[0]) + + assert all( + map(lambda p: path.splitext(p)[1] == ext, source_files) + ), "Not all image files are of the same type" + + self.source_files = list( + map(lambda p: cast(str, path.normpath(p)), source_files) + ) + + return True + + def convert_input(self, args: Namespace) -> bool: + logger.info("Converting image stack") + + # add missing config attributes with defaults + image_stack_parser = create_image_stack_parser() + put_default_from_argparser_if_not_present( + args, image_stack_parser, "target_mag" + ) + put_default_from_argparser_if_not_present( + args, image_stack_parser, "wkw_file_len" + ) + put_default_from_argparser_if_not_present( + args, image_stack_parser, "interpolation_mode" + ) + put_default_from_argparser_if_not_present(args, image_stack_parser, "start_z") + put_default_from_argparser_if_not_present(args, image_stack_parser, "jobs") + put_default_from_argparser_if_not_present( + args, image_stack_parser, "distribution_strategy" + ) + put_default_from_argparser_if_not_present( + args, image_stack_parser, "job_resources" + ) + put_default_from_argparser_if_not_present(args, image_stack_parser, "pad") + put_default_from_argparser_if_not_present(args, image_stack_parser, "max_mag") + put_default_from_argparser_if_not_present( + args, image_stack_parser, "no_compress" + ) + put_default_from_argparser_if_not_present(args, image_stack_parser, "isotropic") + put_default_from_argparser_if_not_present(args, image_stack_parser, "verbose") + + # detect layer and ds name + ( + dataset_name, + layer_path_to_name, + ) = self.detect_dataset_name_and_layer_path_to_layer_name() + put_default_if_not_present(args, "name", dataset_name) + + for layer_path, layer_name in layer_path_to_name.items(): + args.layer_name = layer_name + args.source_path = layer_path + cube_image_stack( + args.source_path, + args.target_path, + args.layer_name, + args.batch_size if "batch_size" in args else None, + args, + ) + + return True + + def detect_dataset_name_and_layer_path_to_layer_name( + self, + ) -> Tuple[str, Dict[str, str]]: + # path format is (.../)(dataset_name/)(layer_name/)file_name.ending + traversal_depth = self.check_path_length_and_set_prefix() + + starts_with_prefix = self.prefix != "" + if starts_with_prefix: + traversal_depth = traversal_depth - 1 + + if traversal_depth == 1: + self.apply_handle_function(self.handle_path_length_1, starts_with_prefix) + elif traversal_depth == 2: + self.apply_handle_function(self.handle_path_length_2, starts_with_prefix) + else: + self.apply_handle_function( + self.handle_path_length_longer, starts_with_prefix + ) + + assert ( + len(self.dataset_names) == 1 + ), "More than one dataset found. Stopping conversion..." + assert ( + len(self.layer_path_to_layer_name) > 0 + ), "No layers found. Stopping conversion..." + + return self.dataset_names.pop(), self.layer_path_to_layer_name + + def handle_path_length_1( + self, + split_path: List[str], + ) -> None: + if len(self.source_files) == 1: + self.dataset_names.add(path.splitext(split_path[0])[0]) + self.layer_path_to_layer_name[self.prefix + split_path[0]] = "color" + else: + self.dataset_names.add("dataset") + self.layer_path_to_layer_name[self.prefix] = "color" + + def handle_path_length_2( + self, + split_path: List[str], + ) -> None: + if split_path[0] in ["color", "segmentation", "mask"]: + layer_name = split_path[0] + self.dataset_names.add("dataset") + else: + self.dataset_names.add(split_path[0]) + if len(self.source_files) == 1: + layer_name = path.splitext(split_path[1])[0] + else: + layer_name = "color" + + if len(self.source_files) == 1: + self.layer_path_to_layer_name[self.source_files[0]] = layer_name + else: + self.layer_path_to_layer_name[self.prefix + split_path[0]] = layer_name + + def handle_path_length_longer( + self, + split_path: List[str], + ) -> None: + self.dataset_names.add(split_path[-3]) + if len(self.source_files) == 1: + self.layer_path_to_layer_name[self.source_files[0]] = split_path[-2] + else: + self.layer_path_to_layer_name[ + self.prefix + sep.join(split_path[0:-1]) + ] = split_path[-2] + + +class ConverterManager: + def __init__(self) -> None: + self.converter: List[Converter] = [ + WkwConverter(), + NiftiConverter(), + KnossosConverter(), + ImageStackConverter(), + ] + + +def main(args: Namespace) -> None: + converter_manager = ConverterManager() + + matching_converters = list( + filter( + lambda c: c.accepts_input(args.source_path), + converter_manager.converter, + ) + ) + + if len(matching_converters) == 0: + logger.info("No converter found. Please check the source path.") + exit(1) + elif len(matching_converters) > 1: + logger.info( + "Multiple converters found. Check if your source path contains multiple datasets." + ) + exit(1) + + should_write_metadata = matching_converters[0].convert_input(args) + if should_write_metadata: + write_webknossos_metadata(args.target_path, args.name, args.scale) + + +if __name__ == "__main__": + parsed_args = create_parser().parse_args() + setup_logging(parsed_args) + + main(parsed_args) diff --git a/wkcuber/cubing.py b/wkcuber/cubing.py index d13358b34..9d866ab4d 100644 --- a/wkcuber/cubing.py +++ b/wkcuber/cubing.py @@ -1,11 +1,12 @@ import time import logging -from typing import List, Tuple +from typing import List, Tuple, Optional import numpy as np import wkw from argparse import ArgumentParser, Namespace from os import path +from pathlib import Path from natsort import natsorted from .mag import Mag @@ -17,8 +18,8 @@ from .utils import ( get_chunks, find_files, - add_verbose_flag, add_batch_size_flag, + add_verbose_flag, open_wkw, ensure_wkw, WkwDatasetInfo, @@ -58,7 +59,7 @@ def create_parser() -> ArgumentParser: "--dtype", "-d", help="Target datatype (e.g. uint8, uint16, uint32)", - default="uint8", + default=None, ) parser.add_argument( @@ -95,10 +96,13 @@ def create_parser() -> ArgumentParser: def find_source_filenames(source_path: str) -> List[str]: - # Find all files in a folder that have a matching file extension - source_files = list( - find_files(path.join(source_path, "*"), image_reader.readers.keys()) - ) + # Find all source files that have a matching file extension + + if Path(source_path).is_dir(): + source_path = path.join(source_path, "*") + + source_files = list(find_files(source_path, image_reader.readers.keys())) + assert len(source_files) > 0, ( "No image files found in path " + source_path @@ -106,6 +110,7 @@ def find_source_filenames(source_path: str) -> List[str]: + str(image_reader.readers.keys()) + "." ) + return natsorted(source_files) @@ -233,8 +238,7 @@ def cubing( source_path: str, target_path: str, layer_name: str, - dtype: str, - batch_size: int, + batch_size: Optional[int], args: Namespace, ) -> dict: source_files = find_source_filenames(source_path) @@ -251,13 +255,19 @@ def cubing( else: num_z = len(source_files) + if not hasattr(args, "dtype") or args.dtype is None: + args.dtype = image_reader.read_dtype(source_files[0]) + + if batch_size is None: + batch_size = BLOCK_LEN + target_mag = Mag(args.target_mag) target_wkw_info = WkwDatasetInfo( target_path, layer_name, target_mag, wkw.Header( - convert_element_class_to_dtype(dtype), + convert_element_class_to_dtype(args.dtype), num_channels, file_len=args.wkw_file_len, ), @@ -316,7 +326,6 @@ def cubing( args.source_path, args.target_path, args.layer_name, - args.dtype, args.batch_size, args=args, ) diff --git a/wkcuber/downsampling_utils.py b/wkcuber/downsampling_utils.py index 304659443..dfed8a818 100644 --- a/wkcuber/downsampling_utils.py +++ b/wkcuber/downsampling_utils.py @@ -70,9 +70,9 @@ def parse_interpolation_mode( ) -> InterpolationModes: if interpolation_mode.upper() == "DEFAULT": return ( - InterpolationModes.MEDIAN - if layer_name == "color" - else InterpolationModes.MODE + InterpolationModes.MODE + if layer_name == "segmentation" + else InterpolationModes.MEDIAN ) else: return InterpolationModes[interpolation_mode.upper()] diff --git a/wkcuber/image_readers.py b/wkcuber/image_readers.py index 847892972..b5b4507c9 100644 --- a/wkcuber/image_readers.py +++ b/wkcuber/image_readers.py @@ -28,6 +28,9 @@ def read_z_slices_per_file( ) -> int: return 1 + def read_dtype(self, file_name: str) -> str: + raise NotImplementedError() + class PillowImageReader(ImageReader): def read_array(self, file_name: str, dtype: np.dtype, z_slice: int) -> np.ndarray: @@ -49,6 +52,9 @@ def read_channel_count(self, file_name: str) -> int: else: return this_layer.shape[-1] # pylint: disable=unsubscriptable-object + def read_dtype(self, file_name: str) -> str: + return np.array(Image.open(file_name)).dtype.name + def to_target_datatype(data: np.ndarray, target_dtype: np.dtype) -> np.ndarray: factor = (1 + np.iinfo(data.dtype).max) / (1 + np.iinfo(target_dtype).max) @@ -71,6 +77,9 @@ def read_channel_count(self, _file_name: str) -> int: logging.info("Assuming single channel for DM3 data") return 1 + def read_dtype(self, file_name: str) -> str: + return DM3(file_name).imagedata.dtype.name + class Dm4ImageReader(ImageReader): def _read_tags(self, dm4file: DM4File) -> Tuple[DM4File.DM4TagDir, DM4TagHeader]: @@ -122,6 +131,10 @@ def read_channel_count(self, _file_name: str) -> int: logging.info("Assuming single channel for DM4 data") return 1 + def read_dtype(self, file_name: str) -> str: # pylint: disable=unused-argument + # DM4 standard input type is uint16 + return "uint16" + def find_count_of_axis(tif_file: TiffFile, axis: str) -> int: assert len(tif_file.series) == 1, "only single tif series are supported" @@ -179,6 +192,12 @@ def read_z_slices_per_file(self, file_name: str) -> int: with TiffFile(file_name) as tif_file: return find_count_of_axis(tif_file, "Z") + def read_dtype(self, file_name: str) -> str: + with TiffFile(file_name) as tif_file: + return tif_file.series[ # pylint: disable=unsubscriptable-object + 0 + ].dtype.name + class ImageReaderManager: def __init__(self) -> None: @@ -218,5 +237,9 @@ def read_z_slices_per_file(self, file_name: str) -> int: _, ext = path.splitext(file_name) return self.readers[ext].read_z_slices_per_file(file_name) + def read_dtype(self, file_name: str) -> str: + _, ext = path.splitext(file_name) + return self.readers[ext].read_dtype(file_name) + image_reader = ImageReaderManager() diff --git a/wkcuber/tile_cubing.py b/wkcuber/tile_cubing.py index a64dabb90..71fa4e062 100644 --- a/wkcuber/tile_cubing.py +++ b/wkcuber/tile_cubing.py @@ -154,7 +154,6 @@ def find_file_with_dimensions( z_value: int, decimal_lengths: Dict[str, int], ) -> Union[str, None]: - file_path_unpadded = replace_coordinates( file_path_pattern, {"z": (z_value, 0), "y": (y_value, 0), "x": (x_value, 0)} ) @@ -264,7 +263,6 @@ def tile_cubing_job( def tile_cubing( target_path: str, layer_name: str, - dtype: str, batch_size: int, input_path_pattern: str, args: Namespace = None, @@ -291,6 +289,8 @@ def tile_cubing( file_count, tile_size[0], tile_size[1] ) ) + if args is None or not hasattr(args, "dtype") or args.dtype is None: + dtype = image_reader.read_dtype(arbitrary_file) target_wkw_info = WkwDatasetInfo( target_path, @@ -341,7 +341,6 @@ def create_parser() -> ArgumentParser: tile_cubing( args.target_path, args.layer_name, - args.dtype, int(args.batch_size), input_path_pattern, args, diff --git a/wkcuber/utils.py b/wkcuber/utils.py index 5680705aa..e5061ebe4 100644 --- a/wkcuber/utils.py +++ b/wkcuber/utils.py @@ -45,7 +45,6 @@ KnossosDatasetInfo = namedtuple("KnossosDatasetInfo", ("dataset_path", "dtype")) FallbackArgs = namedtuple("FallbackArgs", ("distribution_strategy", "jobs")) - BLOCK_LEN = 32 DEFAULT_WKW_FILE_LEN = 32 DEFAULT_WKW_VOXELS_PER_BLOCK = 32 @@ -153,7 +152,7 @@ def find_files( return ( f for f in iglob(source_path, recursive=True) - if any([f.endswith(suffix) for suffix in extensions]) + if any([f.lower().endswith(suffix) for suffix in extensions]) )