Fix downsampling/compression of segmentation layers + misc (#657)

* add test for cubing/downsampling/compressing (end-to-end) for a color and segmentation layer * fix downsampling and compressing segmentation layers (cast largest_segment_id to int if necessary and safe; also pass when creating copy of layer) * improve logging (e.g., point out deprecated caller location) and remove some deprecation warnings * upgrade zarr in wkcuber to be in sync with webknossos package * add/clean up ./lint.sh, ./typecheck.sh and ./format.sh for wkcuber * remove isort from wkcuber for now * format * fix typing * update changelog Co-authored-by: Norman Rzepka <[email protected]>
scalableminds · Mar 18, 2022 · 29821ee · 29821ee
1 parent f90ad7b
commit 29821ee
Show file tree

Hide file tree

Showing 14 changed files with 108 additions and 16 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -194,15 +194,15 @@ jobs:
       run: tar -xzvf testdata/WT1_wkw.tar.gz
 
     - name: Check formatting
-      run: poetry run black --check .
+      run: ./format.sh check
       if: ${{ needs.changes.outputs.wkcuber == 'true' }}
 
     - name: Lint code
-      run: poetry run pylint -j4 wkcuber
+      run: ./lint.sh
       if: ${{ needs.changes.outputs.wkcuber == 'true' }}
 
     - name: Check typing
-      run: poetry run ./typecheck.sh
+      run: ./typecheck.sh
 
     - name: Python tests
       run: poetry run pytest tests

diff --git a/webknossos/Changelog.md b/webknossos/Changelog.md
@@ -32,7 +32,7 @@ For upgrade instructions, please check the respective *Breaking Changes* section
 - Dataset: `block_len` and `file_len` attributes are now deprecated, but still available for backwards compatibility. Use `chunk_size` and `chunks_per_shard` instead. These new attributes are `Vec3Int`, so they can be set non-uniformly. However, WKW-backed layers still require uniform `chunk_size` and `chunks_per_shard`. [#627](https://github.com/scalableminds/webknossos-libs/pull/627)
 
 ### Fixed
-
+- Fixed crash during downsampling and compression of segmentation layers. [#657](https://github.com/scalableminds/webknossos-libs/pull/657)
 
 ## [0.9.11](https://github.com/scalableminds/webknossos-libs/releases/tag/v0.9.11) - 2022-03-16
 [Commits](https://github.com/scalableminds/webknossos-libs/compare/v0.9.10...v0.9.11)

diff --git a/webknossos/webknossos/dataset/layer.py b/webknossos/webknossos/dataset/layer.py
@@ -998,6 +998,9 @@ def dtype_per_layer(self) -> str:
             self.dtype_per_channel, self.num_channels
         )
 
+    def _get_largest_segment_id_maybe(self) -> Optional[int]:
+        return None
+
 
 class SegmentationLayer(Layer):
 
@@ -1009,9 +1012,18 @@ def largest_segment_id(self) -> int:
 
     @largest_segment_id.setter
     def largest_segment_id(self, largest_segment_id: int) -> None:
+        if type(largest_segment_id) != int:
+            assert largest_segment_id == int(
+                largest_segment_id
+            ), f"A non-integer value was passed for largest_segment_id ({largest_segment_id})."
+            largest_segment_id = int(largest_segment_id)
+
         self._properties.largest_segment_id = largest_segment_id
         self.dataset._export_as_json()
 
     @property
     def category(self) -> LayerCategoryType:
         return SEGMENTATION_CATEGORY
+
+    def _get_largest_segment_id_maybe(self) -> Optional[int]:
+        return self.largest_segment_id
diff --git a/webknossos/webknossos/dataset/mag_view.py b/webknossos/webknossos/dataset/mag_view.py
@@ -289,6 +289,7 @@ def compress(
             dtype_per_channel=self.layer.dtype_per_channel,
             num_channels=self.layer.num_channels,
             data_format=self.layer.data_format,
+            largest_segment_id=self.layer._get_largest_segment_id_maybe(),
         ).get_or_add_mag(
             mag=self.mag,
             chunk_size=self.info.chunk_size,

diff --git a/webknossos/webknossos/utils.py b/webknossos/webknossos/utils.py
@@ -9,6 +9,7 @@
 from concurrent.futures import as_completed
 from concurrent.futures._base import Future
 from datetime import datetime
+from inspect import getframeinfo, stack
 from multiprocessing import cpu_count
 from os.path import relpath
 from pathlib import Path
@@ -199,7 +200,8 @@ def get_rich_progress() -> Progress:
 
 
 def warn_deprecated(deprecated_item: str, alternative_item: str) -> None:
+    caller = getframeinfo(stack()[2][0])
     warnings.warn(
-        f"[DEPRECATION] `{deprecated_item}` is deprecated, please use `{alternative_item}` instead.",
+        f"[DEPRECATION] `{deprecated_item}` is deprecated, please use `{alternative_item}` instead (see {caller.filename}:{caller.lineno})",
         DeprecationWarning,
     )
diff --git a/wkcuber/format.sh b/wkcuber/format.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -eEuo pipefail
+
+if [ $# -eq 1 ] && [ "$1" = "check" ]; then
+    poetry run black --check .
+else
+    poetry run black .
+fi
diff --git a/wkcuber/lint.sh b/wkcuber/lint.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+set -eEuo pipefail
+
+poetry run pylint -j4 wkcuber
diff --git a/wkcuber/poetry.lock b/wkcuber/poetry.lock
diff --git a/wkcuber/pyproject.toml b/wkcuber/pyproject.toml
@@ -28,7 +28,7 @@ scipy = "^1.6.0"
 tifffile = "^2020.11.26"
 webknossos = { path = "../webknossos/", develop = true }
 wkw = "1.1.11"
-zarr = "^2.10.3"
+zarr = "^2.11.0"
 
 [tool.poetry.dev-dependencies]
 black = "^20.8b1"

diff --git a/wkcuber/tests/test_main.py b/wkcuber/tests/test_main.py
@@ -0,0 +1,65 @@
+from pathlib import Path
+import numpy as np
+import pytest
+from wkcuber.utils import (
+    setup_logging,
+)
+from webknossos import Dataset
+from wkcuber.__main__ import create_parser, cube_with_args
+from tifffile import TiffWriter
+
+TESTOUTPUT_DIR = Path("testoutput")
+
+
+@pytest.mark.parametrize("category", ["color", "segmentation"])
+def test_main(category: str) -> None:
+    input_folder = TESTOUTPUT_DIR / "raw_dataset" / category
+    input_folder.mkdir(parents=True, exist_ok=True)
+
+    raw_file = input_folder / "input.tif"
+
+    input_dtype = "uint32"
+    shape = 64, 128, 256
+    data = np.arange(np.prod(shape), dtype=input_dtype).reshape(shape)
+    with TiffWriter(raw_file) as tif:
+        tif.write(data.transpose([2, 1, 0]))
+
+    output_path = TESTOUTPUT_DIR / "output_2"
+    output_path.mkdir()
+
+    args_list = [
+        str(TESTOUTPUT_DIR / "raw_dataset"),
+        str(output_path),
+        "--jobs",
+        "1",
+        "--scale",
+        "11,11,11",
+        "--max_mag",
+        "4",
+    ]
+
+    args = create_parser().parse_args(args_list)
+    cube_with_args(args)
+
+    dataset = Dataset.open(output_path)
+    if category == "color":
+        layer = dataset.get_color_layers()[0]
+    else:
+        layer = dataset.get_segmentation_layers()[0]
+    mag_view = layer.get_mag(1)
+    view = mag_view.get_view()
+    read_data = view.read()
+
+    assert view.size == shape
+    assert view.get_dtype() == data.dtype
+    assert np.array_equal(
+        read_data[0],
+        data,
+    )
+
+
+if __name__ == "__main__":
+    from argparse import Namespace
+
+    setup_logging(Namespace(verbose=False))
+    test_main("color")
diff --git a/wkcuber/tests/test_raw_conversion.py b/wkcuber/tests/test_raw_conversion.py
@@ -43,7 +43,7 @@ def test_main(order: str, flip_axes: Optional[Tuple[int, int]]) -> None:
     main(args)
 
     dataset = Dataset.open(output_path)
-    layer = dataset.get_color_layer()
+    layer = dataset.get_color_layers()[0]
     mag_view = layer.get_mag(1)
     view = mag_view.get_view()
     read_data = view.read()

diff --git a/wkcuber/typecheck.sh b/wkcuber/typecheck.sh
@@ -2,7 +2,7 @@
 set -eEuo pipefail
 
 echo "Typecheck wkcuber module..."
-python -m mypy -p wkcuber --disallow-untyped-defs --show-error-codes --strict-equality --namespace-packages --no-implicit-optional
+poetry run python -m mypy -p wkcuber --disallow-untyped-defs --show-error-codes --strict-equality --namespace-packages --no-implicit-optional
 
 echo "Typecheck tests..."
-python -m mypy -p tests --disallow-untyped-defs --show-error-codes --strict-equality --namespace-packages --no-implicit-optional
+poetry run python -m mypy -p tests --disallow-untyped-defs --show-error-codes --strict-equality --namespace-packages --no-implicit-optional
diff --git a/wkcuber/wkcuber/converter.py b/wkcuber/wkcuber/converter.py
@@ -541,7 +541,7 @@ def main(args: Namespace) -> None:
         exit(1)
     elif len(matching_converters) > 1:
         logger.info(
-            "Multiple converters found. Check if your source path contains multiple datasets."
+            f"Multiple converters found. Check if your source path contains multiple datasets. Converters: {matching_converters}"
         )
         exit(1)
 

diff --git a/wkcuber/wkcuber/cubing.py b/wkcuber/wkcuber/cubing.py
@@ -220,7 +220,7 @@ def cubing_job(
                 # Image shape will be (x, y, channel_count, z=1)
                 image = read_image_file(
                     file_name,
-                    target_view.header.voxel_type,
+                    target_view.info.voxel_type,
                     z,
                     channel_index,
                     sample_index,
@@ -378,7 +378,7 @@ def cubing(
     )
 
     target_mag_view = target_layer.get_or_add_mag(
-        target_mag, file_len=wkw_file_len, block_len=BLOCK_LEN
+        target_mag, chunks_per_shard=wkw_file_len, chunk_size=BLOCK_LEN
     )
 
     interpolation_mode = parse_interpolation_mode(