From 29821eeabdb06e4ea5d6bf931017c59177910fc2 Mon Sep 17 00:00:00 2001 From: Philipp Otto Date: Fri, 18 Mar 2022 10:47:25 +0100 Subject: [PATCH] Fix downsampling/compression of segmentation layers + misc (#657) * add test for cubing/downsampling/compressing (end-to-end) for a color and segmentation layer * fix downsampling and compressing segmentation layers (cast largest_segment_id to int if necessary and safe; also pass when creating copy of layer) * improve logging (e.g., point out deprecated caller location) and remove some deprecation warnings * upgrade zarr in wkcuber to be in sync with webknossos package * add/clean up ./lint.sh, ./typecheck.sh and ./format.sh for wkcuber * remove isort from wkcuber for now * format * fix typing * update changelog Co-authored-by: Norman Rzepka --- .github/workflows/ci.yml | 6 +-- webknossos/Changelog.md | 2 +- webknossos/webknossos/dataset/layer.py | 12 +++++ webknossos/webknossos/dataset/mag_view.py | 1 + webknossos/webknossos/utils.py | 4 +- wkcuber/format.sh | 8 +++ wkcuber/lint.sh | 4 ++ wkcuber/poetry.lock | 8 +-- wkcuber/pyproject.toml | 2 +- wkcuber/tests/test_main.py | 65 +++++++++++++++++++++++ wkcuber/tests/test_raw_conversion.py | 2 +- wkcuber/typecheck.sh | 4 +- wkcuber/wkcuber/converter.py | 2 +- wkcuber/wkcuber/cubing.py | 4 +- 14 files changed, 108 insertions(+), 16 deletions(-) create mode 100755 wkcuber/format.sh create mode 100755 wkcuber/lint.sh create mode 100644 wkcuber/tests/test_main.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12a558aa3..a4162a191 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -194,15 +194,15 @@ jobs: run: tar -xzvf testdata/WT1_wkw.tar.gz - name: Check formatting - run: poetry run black --check . + run: ./format.sh check if: ${{ needs.changes.outputs.wkcuber == 'true' }} - name: Lint code - run: poetry run pylint -j4 wkcuber + run: ./lint.sh if: ${{ needs.changes.outputs.wkcuber == 'true' }} - name: Check typing - run: poetry run ./typecheck.sh + run: ./typecheck.sh - name: Python tests run: poetry run pytest tests diff --git a/webknossos/Changelog.md b/webknossos/Changelog.md index 3097ab975..c85766235 100644 --- a/webknossos/Changelog.md +++ b/webknossos/Changelog.md @@ -32,7 +32,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section - Dataset: `block_len` and `file_len` attributes are now deprecated, but still available for backwards compatibility. Use `chunk_size` and `chunks_per_shard` instead. These new attributes are `Vec3Int`, so they can be set non-uniformly. However, WKW-backed layers still require uniform `chunk_size` and `chunks_per_shard`. [#627](https://github.com/scalableminds/webknossos-libs/pull/627) ### Fixed - +- Fixed crash during downsampling and compression of segmentation layers. [#657](https://github.com/scalableminds/webknossos-libs/pull/657) ## [0.9.11](https://github.com/scalableminds/webknossos-libs/releases/tag/v0.9.11) - 2022-03-16 [Commits](https://github.com/scalableminds/webknossos-libs/compare/v0.9.10...v0.9.11) diff --git a/webknossos/webknossos/dataset/layer.py b/webknossos/webknossos/dataset/layer.py index 5f29d3445..035d3446c 100644 --- a/webknossos/webknossos/dataset/layer.py +++ b/webknossos/webknossos/dataset/layer.py @@ -998,6 +998,9 @@ def dtype_per_layer(self) -> str: self.dtype_per_channel, self.num_channels ) + def _get_largest_segment_id_maybe(self) -> Optional[int]: + return None + class SegmentationLayer(Layer): @@ -1009,9 +1012,18 @@ def largest_segment_id(self) -> int: @largest_segment_id.setter def largest_segment_id(self, largest_segment_id: int) -> None: + if type(largest_segment_id) != int: + assert largest_segment_id == int( + largest_segment_id + ), f"A non-integer value was passed for largest_segment_id ({largest_segment_id})." + largest_segment_id = int(largest_segment_id) + self._properties.largest_segment_id = largest_segment_id self.dataset._export_as_json() @property def category(self) -> LayerCategoryType: return SEGMENTATION_CATEGORY + + def _get_largest_segment_id_maybe(self) -> Optional[int]: + return self.largest_segment_id diff --git a/webknossos/webknossos/dataset/mag_view.py b/webknossos/webknossos/dataset/mag_view.py index 0f12dd7b8..a54e12a9b 100644 --- a/webknossos/webknossos/dataset/mag_view.py +++ b/webknossos/webknossos/dataset/mag_view.py @@ -289,6 +289,7 @@ def compress( dtype_per_channel=self.layer.dtype_per_channel, num_channels=self.layer.num_channels, data_format=self.layer.data_format, + largest_segment_id=self.layer._get_largest_segment_id_maybe(), ).get_or_add_mag( mag=self.mag, chunk_size=self.info.chunk_size, diff --git a/webknossos/webknossos/utils.py b/webknossos/webknossos/utils.py index 1b9c4e948..7d6d0bfff 100644 --- a/webknossos/webknossos/utils.py +++ b/webknossos/webknossos/utils.py @@ -9,6 +9,7 @@ from concurrent.futures import as_completed from concurrent.futures._base import Future from datetime import datetime +from inspect import getframeinfo, stack from multiprocessing import cpu_count from os.path import relpath from pathlib import Path @@ -199,7 +200,8 @@ def get_rich_progress() -> Progress: def warn_deprecated(deprecated_item: str, alternative_item: str) -> None: + caller = getframeinfo(stack()[2][0]) warnings.warn( - f"[DEPRECATION] `{deprecated_item}` is deprecated, please use `{alternative_item}` instead.", + f"[DEPRECATION] `{deprecated_item}` is deprecated, please use `{alternative_item}` instead (see {caller.filename}:{caller.lineno})", DeprecationWarning, ) diff --git a/wkcuber/format.sh b/wkcuber/format.sh new file mode 100755 index 000000000..91b356252 --- /dev/null +++ b/wkcuber/format.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -eEuo pipefail + +if [ $# -eq 1 ] && [ "$1" = "check" ]; then + poetry run black --check . +else + poetry run black . +fi diff --git a/wkcuber/lint.sh b/wkcuber/lint.sh new file mode 100755 index 000000000..89548a2d8 --- /dev/null +++ b/wkcuber/lint.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -eEuo pipefail + +poetry run pylint -j4 wkcuber \ No newline at end of file diff --git a/wkcuber/poetry.lock b/wkcuber/poetry.lock index 5ee878c25..a44fbc260 100644 --- a/wkcuber/poetry.lock +++ b/wkcuber/poetry.lock @@ -1121,7 +1121,7 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" [[package]] name = "zarr" -version = "2.10.3" +version = "2.11.1" description = "An implementation of chunked, compressed, N-dimensional arrays for Python." category = "main" optional = false @@ -1151,7 +1151,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.7,>=3.7.1" -content-hash = "108f4e0afeae30d19508bbc9dc0a7486ce5db4126cb2ebe28fbfcca9b2bdd80f" +content-hash = "c305b92a4b584885b5a455422cc48b34b66b8dd6dde157584d589b6430a25adb" [metadata.files] anyio = [ @@ -2149,8 +2149,8 @@ wrapt = [ {file = "wrapt-1.13.3.tar.gz", hash = "sha256:1fea9cd438686e6682271d36f3481a9f3636195578bab9ca3382e2f5f01fc185"}, ] zarr = [ - {file = "zarr-2.10.3-py3-none-any.whl", hash = "sha256:1354d6de15683a3f7ea9c47e7bfa5772da445d25298988bacc8e499db8896186"}, - {file = "zarr-2.10.3.tar.gz", hash = "sha256:76932665c2146ebdf15f6dba254f9e0030552fbfcf9322dea822bff96fbce693"}, + {file = "zarr-2.11.1-py3-none-any.whl", hash = "sha256:126cf3fe6d0276f64a1590eb6e18edf5e7c903cc4a879829b3ebdc85238b7894"}, + {file = "zarr-2.11.1.tar.gz", hash = "sha256:11b628f42dec36e0147879e8bd471524b59b238094b9b21e3c35be78399c115e"}, ] zipp = [ {file = "zipp-3.7.0-py3-none-any.whl", hash = "sha256:b47250dd24f92b7dd6a0a8fc5244da14608f3ca90a5efcd37a3b1642fac9a375"}, diff --git a/wkcuber/pyproject.toml b/wkcuber/pyproject.toml index 5c9aa209b..277e5ea80 100644 --- a/wkcuber/pyproject.toml +++ b/wkcuber/pyproject.toml @@ -28,7 +28,7 @@ scipy = "^1.6.0" tifffile = "^2020.11.26" webknossos = { path = "../webknossos/", develop = true } wkw = "1.1.11" -zarr = "^2.10.3" +zarr = "^2.11.0" [tool.poetry.dev-dependencies] black = "^20.8b1" diff --git a/wkcuber/tests/test_main.py b/wkcuber/tests/test_main.py new file mode 100644 index 000000000..df3c6a663 --- /dev/null +++ b/wkcuber/tests/test_main.py @@ -0,0 +1,65 @@ +from pathlib import Path +import numpy as np +import pytest +from wkcuber.utils import ( + setup_logging, +) +from webknossos import Dataset +from wkcuber.__main__ import create_parser, cube_with_args +from tifffile import TiffWriter + +TESTOUTPUT_DIR = Path("testoutput") + + +@pytest.mark.parametrize("category", ["color", "segmentation"]) +def test_main(category: str) -> None: + input_folder = TESTOUTPUT_DIR / "raw_dataset" / category + input_folder.mkdir(parents=True, exist_ok=True) + + raw_file = input_folder / "input.tif" + + input_dtype = "uint32" + shape = 64, 128, 256 + data = np.arange(np.prod(shape), dtype=input_dtype).reshape(shape) + with TiffWriter(raw_file) as tif: + tif.write(data.transpose([2, 1, 0])) + + output_path = TESTOUTPUT_DIR / "output_2" + output_path.mkdir() + + args_list = [ + str(TESTOUTPUT_DIR / "raw_dataset"), + str(output_path), + "--jobs", + "1", + "--scale", + "11,11,11", + "--max_mag", + "4", + ] + + args = create_parser().parse_args(args_list) + cube_with_args(args) + + dataset = Dataset.open(output_path) + if category == "color": + layer = dataset.get_color_layers()[0] + else: + layer = dataset.get_segmentation_layers()[0] + mag_view = layer.get_mag(1) + view = mag_view.get_view() + read_data = view.read() + + assert view.size == shape + assert view.get_dtype() == data.dtype + assert np.array_equal( + read_data[0], + data, + ) + + +if __name__ == "__main__": + from argparse import Namespace + + setup_logging(Namespace(verbose=False)) + test_main("color") diff --git a/wkcuber/tests/test_raw_conversion.py b/wkcuber/tests/test_raw_conversion.py index e63aa2040..a32b001a1 100644 --- a/wkcuber/tests/test_raw_conversion.py +++ b/wkcuber/tests/test_raw_conversion.py @@ -43,7 +43,7 @@ def test_main(order: str, flip_axes: Optional[Tuple[int, int]]) -> None: main(args) dataset = Dataset.open(output_path) - layer = dataset.get_color_layer() + layer = dataset.get_color_layers()[0] mag_view = layer.get_mag(1) view = mag_view.get_view() read_data = view.read() diff --git a/wkcuber/typecheck.sh b/wkcuber/typecheck.sh index 1f8f7876f..c17eb79a6 100755 --- a/wkcuber/typecheck.sh +++ b/wkcuber/typecheck.sh @@ -2,7 +2,7 @@ set -eEuo pipefail echo "Typecheck wkcuber module..." -python -m mypy -p wkcuber --disallow-untyped-defs --show-error-codes --strict-equality --namespace-packages --no-implicit-optional +poetry run python -m mypy -p wkcuber --disallow-untyped-defs --show-error-codes --strict-equality --namespace-packages --no-implicit-optional echo "Typecheck tests..." -python -m mypy -p tests --disallow-untyped-defs --show-error-codes --strict-equality --namespace-packages --no-implicit-optional +poetry run python -m mypy -p tests --disallow-untyped-defs --show-error-codes --strict-equality --namespace-packages --no-implicit-optional diff --git a/wkcuber/wkcuber/converter.py b/wkcuber/wkcuber/converter.py index 1d94ca5d8..5f2aee1df 100644 --- a/wkcuber/wkcuber/converter.py +++ b/wkcuber/wkcuber/converter.py @@ -541,7 +541,7 @@ def main(args: Namespace) -> None: exit(1) elif len(matching_converters) > 1: logger.info( - "Multiple converters found. Check if your source path contains multiple datasets." + f"Multiple converters found. Check if your source path contains multiple datasets. Converters: {matching_converters}" ) exit(1) diff --git a/wkcuber/wkcuber/cubing.py b/wkcuber/wkcuber/cubing.py index 56850fe61..8021215c0 100644 --- a/wkcuber/wkcuber/cubing.py +++ b/wkcuber/wkcuber/cubing.py @@ -220,7 +220,7 @@ def cubing_job( # Image shape will be (x, y, channel_count, z=1) image = read_image_file( file_name, - target_view.header.voxel_type, + target_view.info.voxel_type, z, channel_index, sample_index, @@ -378,7 +378,7 @@ def cubing( ) target_mag_view = target_layer.get_or_add_mag( - target_mag, file_len=wkw_file_len, block_len=BLOCK_LEN + target_mag, chunks_per_shard=wkw_file_len, chunk_size=BLOCK_LEN ) interpolation_mode = parse_interpolation_mode(