From 463fe6ed58d6aa7bfd87bd305506de76d15b6dca Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Tue, 9 Jul 2024 15:01:10 -0400 Subject: [PATCH] Update reported datatypes to conform with specs (#268) - [Zarr V2](https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding) - [Zarr V3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#core-data-types) zarr-python had been reading our datatypes without complaining. Testing by @chris-delg has revealed that TensorStore will not. ## Changes - Link against nlohman::json in examples builds (orthogonal to the issue, but a light lift). - Remove `sample_type_to_dtype()` from common.{h,cpp} - Implement version-specific `sample_type_to_dtype()` - Update tests to check data type is correct. --- src/common.cpp | 16 ---------- src/common.hh | 7 ----- src/zarr.v2.cpp | 31 ++++++++++++++++++- src/zarr.v3.cpp | 26 +++++++++++++++- tests/multiscales-metadata.cpp | 4 +++ tests/repeat-start.cpp | 4 ++- tests/write-zarr-v2-compressed-multiscale.cpp | 4 +++ ...-compressed-with-chunking-and-rollover.cpp | 4 +++ ...write-zarr-v2-compressed-with-chunking.cpp | 4 +++ ...-raw-chunk-size-larger-than-frame-size.cpp | 4 +++ ...-raw-multiscale-with-trivial-tile-size.cpp | 4 +++ tests/write-zarr-v2-raw-multiscale.cpp | 4 +++ ...v2-raw-with-even-chunking-and-rollover.cpp | 4 +++ .../write-zarr-v2-raw-with-even-chunking.cpp | 4 +++ ...write-zarr-v2-raw-with-ragged-chunking.cpp | 4 +++ tests/write-zarr-v2-raw.cpp | 5 +++ tests/write-zarr-v2-with-lz4-compression.cpp | 4 +++ tests/write-zarr-v2-with-zstd-compression.cpp | 4 +++ tests/write-zarr-v3-compressed.cpp | 2 +- .../write-zarr-v3-raw-chunk-exceeds-array.cpp | 2 +- ...write-zarr-v3-raw-with-ragged-sharding.cpp | 2 +- tests/write-zarr-v3-raw.cpp | 2 +- 22 files changed, 115 insertions(+), 30 deletions(-) diff --git a/src/common.cpp b/src/common.cpp index d79ebd30..597f8764 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1,7 +1,4 @@ #include "common.hh" -#include "zarr.hh" - -#include "platform.h" #include #include @@ -199,19 +196,6 @@ common::bytes_per_chunk(const std::vector& dimensions, return n_bytes; } -const char* -common::sample_type_to_dtype(SampleType t) - -{ - static const char* table[] = { "u1", "u2", "i1", "i2", - "f4", "u2", "u2", "u2" }; - if (t < countof(table)) { - return table[t]; - } else { - throw std::runtime_error("Invalid sample type."); - } -} - const char* common::sample_type_to_string(SampleType t) noexcept { diff --git a/src/common.hh b/src/common.hh index 2e10efe2..b1afe285 100644 --- a/src/common.hh +++ b/src/common.hh @@ -131,13 +131,6 @@ size_t bytes_per_chunk(const std::vector& dimensions, const SampleType& dtype); -/// @brief Get the Zarr dtype for a given SampleType. -/// @param t An enumerated sample type. -/// @throw std::runtime_error if @par t is not a valid SampleType. -/// @return A representation of the SampleType @par t expected by a Zarr reader. -const char* -sample_type_to_dtype(SampleType t); - /// @brief Get a string representation of the SampleType enum. /// @param t An enumerated sample type. /// @return A human-readable representation of the SampleType @par t. diff --git a/src/zarr.v2.cpp b/src/zarr.v2.cpp index f877c628..175ab9be 100644 --- a/src/zarr.v2.cpp +++ b/src/zarr.v2.cpp @@ -4,6 +4,8 @@ #include "nlohmann/json.hpp" +#include + namespace zarr = acquire::sink::zarr; namespace { @@ -22,6 +24,33 @@ compressed_zarr_v2_init() } return nullptr; } + +std::string +sample_type_to_dtype(SampleType t) + +{ + const std::string dtype_prefix = + std::endian::native == std::endian::big ? ">" : "<"; + + switch (t) { + case SampleType_u8: + return dtype_prefix + "u1"; + case SampleType_u10: + case SampleType_u12: + case SampleType_u14: + case SampleType_u16: + return dtype_prefix + "u2"; + case SampleType_i8: + return dtype_prefix + "i1"; + case SampleType_i16: + return dtype_prefix + "i2"; + case SampleType_f32: + return dtype_prefix + "f4"; + default: + throw std::runtime_error("Invalid SampleType: " + + std::to_string(static_cast(t))); + } +} } // end ::{anonymous} namespace /// ZarrV2 @@ -249,7 +278,7 @@ zarr::ZarrV2::write_array_metadata_(size_t level) const metadata["zarr_format"] = 2; metadata["shape"] = array_shape; metadata["chunks"] = chunk_shape; - metadata["dtype"] = common::sample_type_to_dtype(image_shape.type); + metadata["dtype"] = sample_type_to_dtype(image_shape.type); metadata["fill_value"] = 0; metadata["order"] = "C"; metadata["filters"] = nullptr; diff --git a/src/zarr.v3.cpp b/src/zarr.v3.cpp index 311753e5..840a2c35 100644 --- a/src/zarr.v3.cpp +++ b/src/zarr.v3.cpp @@ -24,6 +24,30 @@ compressed_zarr_v3_init() } return nullptr; } + +std::string +sample_type_to_dtype(SampleType t) + +{ + switch (t) { + case SampleType_u8: + return "uint8"; + case SampleType_u10: + case SampleType_u12: + case SampleType_u14: + case SampleType_u16: + return "uint16"; + case SampleType_i8: + return "int8"; + case SampleType_i16: + return "int16"; + case SampleType_f32: + return "float32"; + default: + throw std::runtime_error("Invalid SampleType: " + + std::to_string(static_cast(t))); + } +} } // end ::{anonymous} namespace zarr::ZarrV3::ZarrV3(BloscCompressionParams&& compression_params) @@ -175,7 +199,7 @@ zarr::ZarrV3::write_array_metadata_(size_t level) const }); metadata["chunk_memory_layout"] = "C"; - metadata["data_type"] = common::sample_type_to_dtype(image_shape.type); + metadata["data_type"] = sample_type_to_dtype(image_shape.type); metadata["extensions"] = json::array(); metadata["fill_value"] = 0; metadata["shape"] = array_shape; diff --git a/tests/multiscales-metadata.cpp b/tests/multiscales-metadata.cpp index b1fb4cc4..44c17b4a 100644 --- a/tests/multiscales-metadata.cpp +++ b/tests/multiscales-metadata.cpp @@ -182,6 +182,10 @@ verify_layer(const LayerTestCase& test_case) std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + const auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", frames_per_layer, shape[0]); ASSERT_EQ(int, "%d", layer_frame_height, shape[1]); diff --git a/tests/repeat-start.cpp b/tests/repeat-start.cpp index cb9c2a49..e65eea9e 100644 --- a/tests/repeat-start.cpp +++ b/tests/repeat-start.cpp @@ -187,7 +187,9 @@ validate(AcquireRuntime* runtime) ASSERT_EQ(int, "%d", 32, chunk_shape[3]); CHECK("C" == metadata["chunk_memory_layout"]); - CHECK("u1" == metadata["data_type"]); + + CHECK("uint8" == metadata["data_type"].get()); + CHECK(metadata["extensions"].empty()); const auto array_shape = metadata["shape"]; diff --git a/tests/write-zarr-v2-compressed-multiscale.cpp b/tests/write-zarr-v2-compressed-multiscale.cpp index 870ebd03..1e4ed823 100644 --- a/tests/write-zarr-v2-compressed-multiscale.cpp +++ b/tests/write-zarr-v2-compressed-multiscale.cpp @@ -202,6 +202,10 @@ verify_layer(const LayerTestCase& test_case) std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + const auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", frames_per_layer, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-compressed-with-chunking-and-rollover.cpp b/tests/write-zarr-v2-compressed-with-chunking-and-rollover.cpp index ecbe9f71..411b1eb0 100644 --- a/tests/write-zarr-v2-compressed-with-chunking-and-rollover.cpp +++ b/tests/write-zarr-v2-compressed-with-chunking-and-rollover.cpp @@ -174,6 +174,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", chunk_planes + 1, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-compressed-with-chunking.cpp b/tests/write-zarr-v2-compressed-with-chunking.cpp index c393422f..8944de71 100644 --- a/tests/write-zarr-v2-compressed-with-chunking.cpp +++ b/tests/write-zarr-v2-compressed-with-chunking.cpp @@ -171,6 +171,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", chunk_planes, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-raw-chunk-size-larger-than-frame-size.cpp b/tests/write-zarr-v2-raw-chunk-size-larger-than-frame-size.cpp index 6f576b71..0e732a49 100644 --- a/tests/write-zarr-v2-raw-chunk-size-larger-than-frame-size.cpp +++ b/tests/write-zarr-v2-raw-chunk-size-larger-than-frame-size.cpp @@ -233,6 +233,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", frames_per_chunk, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-raw-multiscale-with-trivial-tile-size.cpp b/tests/write-zarr-v2-raw-multiscale-with-trivial-tile-size.cpp index d638719d..6a1b6c1f 100644 --- a/tests/write-zarr-v2-raw-multiscale-with-trivial-tile-size.cpp +++ b/tests/write-zarr-v2-raw-multiscale-with-trivial-tile-size.cpp @@ -185,6 +185,10 @@ verify_layer(const LayerTestCase& test_case) std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + const auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", frames_per_layer, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-raw-multiscale.cpp b/tests/write-zarr-v2-raw-multiscale.cpp index 1108ee88..88a8e5b8 100644 --- a/tests/write-zarr-v2-raw-multiscale.cpp +++ b/tests/write-zarr-v2-raw-multiscale.cpp @@ -192,6 +192,10 @@ verify_layer(const LayerTestCase& test_case) std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + const auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", frames_per_layer, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-raw-with-even-chunking-and-rollover.cpp b/tests/write-zarr-v2-raw-with-even-chunking-and-rollover.cpp index 7f0e33d8..18379436 100644 --- a/tests/write-zarr-v2-raw-with-even-chunking-and-rollover.cpp +++ b/tests/write-zarr-v2-raw-with-even-chunking-and-rollover.cpp @@ -162,6 +162,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", chunk_planes + 1, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-raw-with-even-chunking.cpp b/tests/write-zarr-v2-raw-with-even-chunking.cpp index 8d9ddfb9..58fbdb9b 100644 --- a/tests/write-zarr-v2-raw-with-even-chunking.cpp +++ b/tests/write-zarr-v2-raw-with-even-chunking.cpp @@ -161,6 +161,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", chunk_planes, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-raw-with-ragged-chunking.cpp b/tests/write-zarr-v2-raw-with-ragged-chunking.cpp index c7232038..971eb66e 100644 --- a/tests/write-zarr-v2-raw-with-ragged-chunking.cpp +++ b/tests/write-zarr-v2-raw-with-ragged-chunking.cpp @@ -237,6 +237,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", max_frame_count, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-raw.cpp b/tests/write-zarr-v2-raw.cpp index 835f2590..d213ecad 100644 --- a/tests/write-zarr-v2-raw.cpp +++ b/tests/write-zarr-v2-raw.cpp @@ -3,6 +3,7 @@ #include "platform.h" // clock #include "logger.h" +#include #include #include #include @@ -233,6 +234,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", frames_per_chunk, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-with-lz4-compression.cpp b/tests/write-zarr-v2-with-lz4-compression.cpp index d87526c8..686d0419 100644 --- a/tests/write-zarr-v2-with-lz4-compression.cpp +++ b/tests/write-zarr-v2-with-lz4-compression.cpp @@ -169,6 +169,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", frames_per_chunk, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v2-with-zstd-compression.cpp b/tests/write-zarr-v2-with-zstd-compression.cpp index 1cd3a013..5430d558 100644 --- a/tests/write-zarr-v2-with-zstd-compression.cpp +++ b/tests/write-zarr-v2-with-zstd-compression.cpp @@ -169,6 +169,10 @@ validate() std::ifstream f(zarray_path); json zarray = json::parse(f); + const std::string dtype = + std::endian::native == std::endian::little ? "u1"; + CHECK(dtype == zarray["dtype"].get()); + auto shape = zarray["shape"]; ASSERT_EQ(int, "%d", frames_per_chunk, shape[0]); ASSERT_EQ(int, "%d", 1, shape[1]); diff --git a/tests/write-zarr-v3-compressed.cpp b/tests/write-zarr-v3-compressed.cpp index 391e639d..734e2521 100644 --- a/tests/write-zarr-v3-compressed.cpp +++ b/tests/write-zarr-v3-compressed.cpp @@ -270,7 +270,7 @@ validate() ASSERT_EQ(int, "%d", chunk_width, chunk_shape[3]); CHECK("C" == metadata["chunk_memory_layout"]); - CHECK("u1" == metadata["data_type"]); + CHECK("uint8" == metadata["data_type"]); CHECK(metadata["extensions"].empty()); const auto array_shape = metadata["shape"]; diff --git a/tests/write-zarr-v3-raw-chunk-exceeds-array.cpp b/tests/write-zarr-v3-raw-chunk-exceeds-array.cpp index bd9a223d..cfc3d7b0 100644 --- a/tests/write-zarr-v3-raw-chunk-exceeds-array.cpp +++ b/tests/write-zarr-v3-raw-chunk-exceeds-array.cpp @@ -261,7 +261,7 @@ validate() ASSERT_EQ(int, "%d", chunk_width, chunk_shape[2]); CHECK("C" == metadata["chunk_memory_layout"]); - CHECK("u1" == metadata["data_type"]); + CHECK("uint8" == metadata["data_type"]); CHECK(metadata["extensions"].empty()); const auto array_shape = metadata["shape"]; diff --git a/tests/write-zarr-v3-raw-with-ragged-sharding.cpp b/tests/write-zarr-v3-raw-with-ragged-sharding.cpp index 92632fa7..b3ccd51a 100644 --- a/tests/write-zarr-v3-raw-with-ragged-sharding.cpp +++ b/tests/write-zarr-v3-raw-with-ragged-sharding.cpp @@ -260,7 +260,7 @@ validate() ASSERT_EQ(int, "%d", chunk_width, chunk_shape[2]); CHECK("C" == metadata["chunk_memory_layout"]); - CHECK("u1" == metadata["data_type"]); + CHECK("uint8" == metadata["data_type"]); CHECK(metadata["extensions"].empty()); const auto array_shape = metadata["shape"]; diff --git a/tests/write-zarr-v3-raw.cpp b/tests/write-zarr-v3-raw.cpp index 5c4df05c..64d84197 100644 --- a/tests/write-zarr-v3-raw.cpp +++ b/tests/write-zarr-v3-raw.cpp @@ -270,7 +270,7 @@ validate() ASSERT_EQ(int, "%d", chunk_width, chunk_shape[3]); CHECK("C" == metadata["chunk_memory_layout"]); - CHECK("u1" == metadata["data_type"]); + CHECK("uint8" == metadata["data_type"]); CHECK(metadata["extensions"].empty()); const auto array_shape = metadata["shape"];