diff --git a/CHANGELOG.md b/CHANGELOG.md index a3c0155a..699ba19b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Calling `acquire_get_configuration` with a Zarr storage device now returns a URI of the storage device, with file:// scheme indicator and absolute path, assuming localhost. +### Added + +- Support for multiscale in Zarr V3 stores. + ## [0.1.11](https://github.com/acquire-project/acquire-driver-zarr/compare/v0.1.10..v0.1.11) - 2024-04-22 ### Fixed diff --git a/src/zarr.cpp b/src/zarr.cpp index fa191e04..2e6af956 100644 --- a/src/zarr.cpp +++ b/src/zarr.cpp @@ -353,9 +353,6 @@ zarr::Zarr::set(const StorageProperties* props) "Cannot set properties while running."); CHECK(props); - StoragePropertyMetadata meta{}; - get_meta(&meta); - // checks the directory exists and is writable validate_props(props); @@ -386,14 +383,7 @@ zarr::Zarr::set(const StorageProperties* props) pixel_scale_um_ = props->pixel_scale_um; set_dimensions_(props); - - if (props->enable_multiscale && !meta.multiscale_is_supported) { - // TODO (aliddell): https://github.com/ome/ngff/pull/206 - LOGE("OME-Zarr multiscale not yet supported in Zarr v3. " - "Multiscale arrays will not be written."); - } - enable_multiscale_ = meta.multiscale_is_supported && - props->enable_multiscale && + enable_multiscale_ = props->enable_multiscale && is_multiscale_supported(acquisition_dimensions_); } @@ -471,6 +461,7 @@ zarr::Zarr::get_meta(StoragePropertyMetadata* meta) const memset(meta, 0, sizeof(*meta)); meta->chunking_is_supported = 1; + meta->multiscale_is_supported = 1; meta->s3_is_supported = 1; } @@ -709,6 +700,106 @@ zarr::Zarr::write_mutable_metadata_() const } } +json +zarr::Zarr::make_multiscale_metadata_() const +{ + json multiscales = json::array({ json::object() }); + // write multiscale metadata + multiscales[0]["version"] = "0.4"; + + auto& axes = multiscales[0]["axes"]; + for (auto dim = acquisition_dimensions_.rbegin(); + dim != acquisition_dimensions_.rend(); + ++dim) { + std::string type; + switch (dim->kind) { + case DimensionType_Space: + type = "space"; + break; + case DimensionType_Channel: + type = "channel"; + break; + case DimensionType_Time: + type = "time"; + break; + case DimensionType_Other: + type = "other"; + break; + default: + throw std::runtime_error("Unknown dimension type"); + } + + if (dim < acquisition_dimensions_.rend() - 2) { + axes.push_back({ { "name", dim->name }, { "type", type } }); + } else { + axes.push_back({ { "name", dim->name }, + { "type", type }, + { "unit", "micrometer" } }); + } + } + + // spatial multiscale metadata + if (writers_.empty()) { + std::vector scales; + for (auto i = 0; i < acquisition_dimensions_.size() - 2; ++i) { + scales.push_back(1.); + } + scales.push_back(pixel_scale_um_.y); + scales.push_back(pixel_scale_um_.x); + + multiscales[0]["datasets"] = { + { + { "path", "0" }, + { "coordinateTransformations", + { + { + { "type", "scale" }, + { "scale", scales }, + }, + } + }, + }, + }; + } else { + for (auto i = 0; i < writers_.size(); ++i) { + std::vector scales; + scales.push_back(std::pow(2, i)); // append + for (auto k = 0; k < acquisition_dimensions_.size() - 3; ++k) { + scales.push_back(1.); + } + scales.push_back(std::pow(2, i) * pixel_scale_um_.y); // y + scales.push_back(std::pow(2, i) * pixel_scale_um_.x); // x + + multiscales[0]["datasets"].push_back({ + { "path", std::to_string(i) }, + { "coordinateTransformations", + { + { + { "type", "scale" }, + { "scale", scales }, + }, + } + }, + }); + } + + // downsampling metadata + multiscales[0]["type"] = "local_mean"; + multiscales[0]["metadata"] = { + { "description", + "The fields in the metadata describe how to reproduce this " + "multiscaling in scikit-image. The method and its parameters are " + "given here." }, + { "method", "skimage.transform.downscale_local_mean" }, + { "version", "0.21.0" }, + { "args", "[2]" }, + { "kwargs", { "cval", 0 } }, + }; + } + + return multiscales; +} + void zarr::Zarr::write_multiscale_frames_(const uint8_t* const data_, size_t bytes_of_data, diff --git a/src/zarr.hh b/src/zarr.hh index ce21aa9a..96727b15 100644 --- a/src/zarr.hh +++ b/src/zarr.hh @@ -16,10 +16,12 @@ #include // std::pair #include +#include + namespace fs = std::filesystem; +using json = nlohmann::json; namespace acquire::sink::zarr { - struct Zarr : public Storage { public: @@ -95,6 +97,7 @@ struct Zarr : public Storage virtual void write_array_metadata_(size_t level) const = 0; /// Multiscale + json make_multiscale_metadata_() const; void write_multiscale_frames_(const uint8_t* data_, size_t bytes_of_data, const ImageShape& shape_); diff --git a/src/zarr.v2.cpp b/src/zarr.v2.cpp index 96ec37d6..1ca0dc68 100644 --- a/src/zarr.v2.cpp +++ b/src/zarr.v2.cpp @@ -64,7 +64,6 @@ zarr::ZarrV2::get_meta(StoragePropertyMetadata* meta) const { Zarr::get_meta(meta); meta->sharding_is_supported = 0; - meta->multiscale_is_supported = 1; } void @@ -111,7 +110,6 @@ void zarr::ZarrV2::write_base_metadata_() const { namespace fs = std::filesystem; - using json = nlohmann::json; const json metadata = { { "zarr_format", 2 } }; const std::string metadata_str = metadata.dump(4); @@ -126,7 +124,6 @@ void zarr::ZarrV2::write_external_metadata_() const { namespace fs = std::filesystem; - using json = nlohmann::json; std::string metadata_str = external_metadata_json_.empty() ? "{}" @@ -147,99 +144,9 @@ void zarr::ZarrV2::write_group_metadata_() const { namespace fs = std::filesystem; - using json = nlohmann::json; json metadata; - metadata["multiscales"] = json::array({ json::object() }); - metadata["multiscales"][0]["version"] = "0.4"; - - auto& axes = metadata["multiscales"][0]["axes"]; - for (auto dim = acquisition_dimensions_.rbegin(); - dim != acquisition_dimensions_.rend(); - ++dim) { - std::string type; - switch (dim->kind) { - case DimensionType_Space: - type = "space"; - break; - case DimensionType_Channel: - type = "channel"; - break; - case DimensionType_Time: - type = "time"; - break; - case DimensionType_Other: - type = "other"; - break; - default: - throw std::runtime_error("Unknown dimension type"); - } - - if (dim < acquisition_dimensions_.rend() - 2) { - axes.push_back({ { "name", dim->name }, { "type", type } }); - } else { - axes.push_back({ { "name", dim->name }, - { "type", type }, - { "unit", "micrometer" } }); - } - } - - // spatial multiscale metadata - if (writers_.empty()) { - std::vector scales; - for (auto i = 0; i < acquisition_dimensions_.size() - 2; ++i) { - scales.push_back(1.); - } - scales.push_back(pixel_scale_um_.y); - scales.push_back(pixel_scale_um_.x); - - metadata["multiscales"][0]["datasets"] = { - { - { "path", "0" }, - { "coordinateTransformations", - { - { - { "type", "scale" }, - { "scale", scales }, - }, - } }, - }, - }; - } else { - for (auto i = 0; i < writers_.size(); ++i) { - std::vector scales; - scales.push_back(std::pow(2, i)); // append - for (auto k = 0; k < acquisition_dimensions_.size() - 3; ++k) { - scales.push_back(1.); - } - scales.push_back(std::pow(2, i) * pixel_scale_um_.y); // y - scales.push_back(std::pow(2, i) * pixel_scale_um_.x); // x - - metadata["multiscales"][0]["datasets"].push_back({ - { "path", std::to_string(i) }, - { "coordinateTransformations", - { - { - { "type", "scale" }, - { "scale", scales }, - }, - } }, - }); - } - - // downsampling metadata - metadata["multiscales"][0]["type"] = "local_mean"; - metadata["multiscales"][0]["metadata"] = { - { "description", - "The fields in the metadata describe how to reproduce this " - "multiscaling in scikit-image. The method and its parameters are " - "given here." }, - { "method", "skimage.transform.downscale_local_mean" }, - { "version", "0.21.0" }, - { "args", "[2]" }, - { "kwargs", { "cval", 0 } }, - }; - } + metadata["multiscales"] = make_multiscale_metadata_(); const std::string metadata_str = metadata.dump(4); const auto* metadata_bytes = (const uint8_t*)metadata_str.c_str(); @@ -253,7 +160,6 @@ void zarr::ZarrV2::write_array_metadata_(size_t level) const { namespace fs = std::filesystem; - using json = nlohmann::json; CHECK(level < writers_.size()); const auto& writer = writers_.at(level); diff --git a/src/zarr.v3.cpp b/src/zarr.v3.cpp index db46f7bf..90d1c1e6 100644 --- a/src/zarr.v3.cpp +++ b/src/zarr.v3.cpp @@ -92,7 +92,6 @@ zarr::ZarrV3::get_meta(StoragePropertyMetadata* meta) const { Zarr::get_meta(meta); meta->sharding_is_supported = 1; - meta->multiscale_is_supported = 0; } void @@ -108,7 +107,6 @@ void zarr::ZarrV3::write_base_metadata_() const { namespace fs = std::filesystem; - using json = nlohmann::json; json metadata; metadata["extensions"] = json::array(); @@ -141,7 +139,6 @@ void zarr::ZarrV3::write_group_metadata_() const { namespace fs = std::filesystem; - using json = nlohmann::json; json metadata; metadata["attributes"]["acquire"] = @@ -151,6 +148,7 @@ zarr::ZarrV3::write_group_metadata_() const true, // allow exceptions true // ignore comments ); + metadata["attributes"]["multiscales"] = make_multiscale_metadata_(); const std::string metadata_str = metadata.dump(4); const auto* metadata_bytes = (const uint8_t*)metadata_str.c_str(); @@ -163,7 +161,6 @@ void zarr::ZarrV3::write_array_metadata_(size_t level) const { namespace fs = std::filesystem; - using json = nlohmann::json; CHECK(level < writers_.size()); const auto& writer = writers_.at(level); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d73fb521..bf02fa17 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -40,6 +40,7 @@ else () write-zarr-v3-raw-with-ragged-sharding write-zarr-v3-raw-chunk-exceeds-array write-zarr-v3-compressed + write-zarr-v3-raw-multiscale write-zarr-v3-to-s3 ) diff --git a/tests/get-meta.cpp b/tests/get-meta.cpp index 161540bb..61c87481 100644 --- a/tests/get-meta.cpp +++ b/tests/get-meta.cpp @@ -86,11 +86,10 @@ main() CHECK(Device_Ok == storage_get_meta(storage, &metadata)); CHECK(metadata.chunking_is_supported); + CHECK(metadata.multiscale_is_supported); CHECK(metadata.s3_is_supported); CHECK((bool)metadata.sharding_is_supported == name.starts_with("ZarrV3")); - CHECK((bool)metadata.multiscale_is_supported != - name.starts_with("ZarrV3")); CHECK(Device_Ok == driver_close_device(device)); } diff --git a/tests/get.cpp b/tests/get.cpp index d8325e29..8b81cba2 100644 --- a/tests/get.cpp +++ b/tests/get.cpp @@ -169,7 +169,7 @@ main() CHECK(props.first_frame_id == 0); // this is ignored - CHECK(props.enable_multiscale == !name.starts_with("ZarrV3")); + CHECK(props.enable_multiscale); storage_properties_destroy(&props); diff --git a/tests/write-zarr-v3-raw-multiscale.cpp b/tests/write-zarr-v3-raw-multiscale.cpp new file mode 100644 index 00000000..b5057753 --- /dev/null +++ b/tests/write-zarr-v3-raw-multiscale.cpp @@ -0,0 +1,357 @@ +/// @brief Test that an acquisition to Zarr V3 with multiscale enabled writes +/// multiple layers to the Zarr group, that the layers are the correct size, +/// that they are chunked accordingly, and that the metadata is written +/// correctly. + +#include +#include +#include + +#include "device/hal/device.manager.h" +#include "acquire.h" +#include "logger.h" + +#include "nlohmann/json.hpp" + +namespace fs = std::filesystem; +using json = nlohmann::json; + +void +reporter(int is_error, + const char* file, + int line, + const char* function, + const char* msg) +{ + fprintf(is_error ? stderr : stdout, + "%s%s(%d) - %s: %s\n", + is_error ? "ERROR " : "", + file, + line, + function, + msg); +} + +/// Helper for passing size static strings as function args. +/// For a function: `f(char*,size_t)` use `f(SIZED("hello"))`. +/// Expands to `f("hello",5)`. +#define SIZED(str) str, sizeof(str) - 1 + +#define L (aq_logger) +#define LOG(...) L(0, __FILE__, __LINE__, __FUNCTION__, __VA_ARGS__) +#define ERR(...) L(1, __FILE__, __LINE__, __FUNCTION__, __VA_ARGS__) +#define EXPECT(e, ...) \ + do { \ + if (!(e)) { \ + char buf[1 << 8] = { 0 }; \ + ERR(__VA_ARGS__); \ + snprintf(buf, sizeof(buf) - 1, __VA_ARGS__); \ + throw std::runtime_error(buf); \ + } \ + } while (0) +#define CHECK(e) EXPECT(e, "Expression evaluated as false: %s", #e) +#define DEVOK(e) CHECK(Device_Ok == (e)) +#define OK(e) CHECK(AcquireStatus_Ok == (e)) + +/// Check that a==b +/// example: `ASSERT_EQ(int,"%d",42,meaning_of_life())` +#define ASSERT_EQ(T, fmt, a, b) \ + do { \ + T a_ = (T)(a); \ + T b_ = (T)(b); \ + EXPECT(a_ == b_, "Expected %s==%s but " fmt "!=" fmt, #a, #b, a_, b_); \ + } while (0) + +/// Check that strings a == b +/// example: `ASSERT_STREQ("foo",container_of_foo)` +#define ASSERT_STREQ(a, b) \ + do { \ + std::string a_ = (a); \ + std::string b_ = (b); \ + EXPECT(a_ == b_, \ + "Expected '%s'=='%s' but '%s'!= '%s'", \ + #a, \ + #b, \ + a_.c_str(), \ + b_.c_str()); \ + } while (0) + +const static uint32_t frame_width = 240; +const static uint32_t frame_height = 135; + +const static uint32_t chunk_width = frame_width / 3; +const static uint32_t chunk_height = frame_height / 3; +const static uint32_t chunk_planes = 128; + +const static uint64_t max_frames = 100; + +void +configure(AcquireRuntime* runtime) +{ + CHECK(runtime); + + const DeviceManager* dm = acquire_device_manager(runtime); + CHECK(dm); + + AcquireProperties props = {}; + OK(acquire_get_configuration(runtime, &props)); + + // configure camera + DEVOK(device_manager_select(dm, + DeviceKind_Camera, + SIZED("simulated.*empty.*"), + &props.video[0].camera.identifier)); + + props.video[0].camera.settings.binning = 1; + props.video[0].camera.settings.pixel_type = SampleType_u8; + props.video[0].camera.settings.shape = { .x = frame_width, + .y = frame_height }; + + // configure storage + DEVOK(device_manager_select(dm, + DeviceKind_Storage, + SIZED("ZarrV3"), + &props.video[0].storage.identifier)); + + const char external_metadata[] = R"({"hello":"world"})"; + const struct PixelScale sample_spacing_um = { 1, 1 }; + + std::string filename = TEST ".zarr"; + CHECK(storage_properties_init(&props.video[0].storage.settings, + 0, + filename.c_str(), + filename.size() + 1, + (char*)external_metadata, + sizeof(external_metadata), + sample_spacing_um, + 4)); + + CHECK(storage_properties_set_enable_multiscale( + &props.video[0].storage.settings, 1)); + + // configure storage dimensions + CHECK(storage_properties_set_dimension(&props.video[0].storage.settings, + 0, + SIZED("x") + 1, + DimensionType_Space, + frame_width, + chunk_width, + 1)); + CHECK(storage_properties_set_dimension(&props.video[0].storage.settings, + 1, + SIZED("y") + 1, + DimensionType_Space, + frame_height, + chunk_height, + 1)); + CHECK(storage_properties_set_dimension(&props.video[0].storage.settings, + 2, + SIZED("c") + 1, + DimensionType_Channel, + 1, + 1, + 1)); + CHECK(storage_properties_set_dimension(&props.video[0].storage.settings, + 3, + SIZED("t") + 1, + DimensionType_Time, + 0, + chunk_planes, + 1)); + + // configure acquisition + props.video[0].max_frame_count = max_frames; + + OK(acquire_configure(runtime, &props)); + + storage_properties_destroy(&props.video[0].storage.settings); +} + +void +acquire(AcquireRuntime* runtime) +{ + OK(acquire_start(runtime)); + OK(acquire_stop(runtime)); +} + +struct LayerTestCase +{ + int layer; + int frame_width; + int frame_height; + int tile_width; + int tile_height; + int frames_per_layer; + int frames_per_chunk; +}; + +void +verify_layer(const LayerTestCase& test_case) +{ + const auto layer = test_case.layer; + const auto layer_tile_width = test_case.tile_width; + const auto layer_frame_width = test_case.frame_width; + const auto layer_tile_height = test_case.tile_height; + const auto layer_frame_height = test_case.frame_height; + const auto frames_per_layer = test_case.frames_per_layer; + const auto frames_per_chunk = test_case.frames_per_chunk; + + const auto array_meta_path = fs::path(TEST ".zarr") / "meta" / "root" / + (std::to_string(layer) + ".array.json"); + CHECK(fs::is_regular_file(array_meta_path)); + CHECK(fs::file_size(array_meta_path) > 0); + + // check metadata + std::ifstream f(array_meta_path); + json array_meta = json::parse(f); + + const auto shape = array_meta["shape"]; + ASSERT_EQ(int, "%d", frames_per_layer, shape[0]); + ASSERT_EQ(int, "%d", 1, shape[1]); + ASSERT_EQ(int, "%d", layer_frame_height, shape[2]); + ASSERT_EQ(int, "%d", layer_frame_width, shape[3]); + + const auto chunk_shape = array_meta["chunk_grid"]["chunk_shape"]; + ASSERT_EQ(int, "%d", frames_per_chunk, chunk_shape[0].get()); + ASSERT_EQ(int, "%d", 1, chunk_shape[1].get()); + ASSERT_EQ(int, "%d", layer_tile_height, chunk_shape[2].get()); + ASSERT_EQ(int, "%d", layer_tile_width, chunk_shape[3].get()); + + // check chunked data + size_t chunk_size = chunk_shape[0].get() * chunk_shape[1].get() * + chunk_shape[2].get() * chunk_shape[3].get(); + size_t shard_file_size = + chunk_size + // 1 chunk per shard + 2 * + sizeof(uint64_t); // 2 uint64_t for the chunk size and the chunk offset + + const auto shards_in_x = + (layer_frame_width + layer_tile_width - 1) / layer_tile_width; + + const auto shards_in_y = + (layer_frame_height + layer_tile_height - 1) / layer_tile_height; + + const fs::path data_root = fs::path(TEST ".zarr") / "data" / "root"; + CHECK(fs::is_directory(data_root)); + + const fs::path layer_root = data_root / std::to_string(layer); + CHECK(fs::is_directory(layer_root)); + + const fs::path t_path = layer_root / "c0"; + CHECK(fs::is_directory(t_path)); + + const fs::path c_path = t_path / "0"; + CHECK(fs::is_directory(c_path)); + + for (auto i = 0; i < shards_in_y; ++i) { + const fs::path y_path = c_path / std::to_string(i); + CHECK(fs::is_directory(y_path)); + + for (auto j = 0; j < shards_in_x; ++j) { + const auto shard_file_path = y_path / std::to_string(j); + CHECK(fs::is_regular_file(shard_file_path)); + + const auto file_size = fs::file_size(shard_file_path); + ASSERT_EQ(int, "%d", shard_file_size, file_size); + } + } + + // check there's not a second shard in t + auto missing_path = layer_root / "c1"; + CHECK(!fs::is_regular_file(missing_path)); + + // check there's not a second shard in c + missing_path = t_path / "1"; + CHECK(!fs::is_regular_file(missing_path)); + + // check there's no add'l shards in y + missing_path = c_path / std::to_string(shards_in_y); + CHECK(!fs::is_regular_file(missing_path)); + + // check there's no add'l shards in x + missing_path = c_path / "0" / std::to_string(shards_in_x); + CHECK(!fs::is_regular_file(missing_path)); +} + +void +validate() +{ + CHECK(fs::is_directory(TEST ".zarr")); + + const auto group_meta_path = + fs::path(TEST ".zarr") / "meta" / "root.group.json"; + CHECK(fs::is_regular_file(group_meta_path)); + CHECK(fs::file_size(group_meta_path) > 0); + + // check metadata + std::ifstream f(group_meta_path); + json group_meta = json::parse(f); + + const auto multiscales = group_meta["attributes"]["multiscales"][0]; + + const auto& axes = multiscales["axes"]; + ASSERT_EQ(int, "%d", 4, axes.size()); + + ASSERT_STREQ("t", axes[0]["name"]); + ASSERT_STREQ("time", axes[0]["type"]); + + ASSERT_STREQ("c", axes[1]["name"]); + ASSERT_STREQ("channel", axes[1]["type"]); + + ASSERT_STREQ("y", axes[2]["name"]); + ASSERT_STREQ("space", axes[2]["type"]); + ASSERT_STREQ("micrometer", axes[2]["unit"]); + + ASSERT_STREQ("x", axes[3]["name"]); + ASSERT_STREQ("space", axes[3]["type"]); + ASSERT_STREQ("micrometer", axes[3]["unit"]); + + const auto& datasets = multiscales["datasets"]; + ASSERT_EQ(int, "%d", 3, datasets.size()); + for (auto i = 0; i < 3; ++i) { + const auto& dataset = datasets.at(i); + ASSERT_STREQ(std::to_string(i), dataset["path"]); + + const auto& coord_trans = dataset["coordinateTransformations"][0]; + ASSERT_STREQ("scale", coord_trans["type"]); + + const auto& scale = coord_trans["scale"]; + ASSERT_EQ(float, "%f", std::pow(2.f, i), scale[0].get()); + ASSERT_EQ(float, "%f", 1.f, scale[1].get()); + ASSERT_EQ(float, "%f", std::pow(2.f, i), scale[2].get()); + ASSERT_EQ(float, "%f", std::pow(2.f, i), scale[3].get()); + } + + ASSERT_STREQ(multiscales["type"], "local_mean"); + + // verify each layer + verify_layer({ 0, 240, 135, 80, 45, 100, 128 }); + verify_layer({ 1, 120, 68, 80, 45, 50, 128 }); // padding here + verify_layer({ 2, 60, 34, 60, 34, 25, 128 }); + + auto missing_path = fs::path(TEST ".zarr/3"); + CHECK(!fs::exists(missing_path)); +} + +int +main() +{ + int retval = 1; + auto runtime = acquire_init(reporter); + + try { + configure(runtime); + acquire(runtime); + validate(); + + retval = 0; + LOG("Done (OK)"); + } catch (const std::exception& exc) { + ERR("Exception: %s", exc.what()); + } catch (...) { + ERR("Unknown exception"); + } + + acquire_shutdown(runtime); + return retval; +}