diff --git a/CHANGELOG.md b/CHANGELOG.md index 87f7c4eb..c67fa205 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Support for [Zarr v3](https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html). +- Support for + the [sharding storage transformer](https://web.archive.org/web/20230213221154/https://zarr-specs.readthedocs.io/en/latest/extensions/storage-transformers/sharding/v1.0.html) + in Zarr v3. +- Ship debug libs for C-Blosc on Linux and Mac. + +### Changed + +- Upgrades C-Blosc from v1.21.4 to v1.21.5. + ### Fixed - A bug where enabling multiscale without specifying the tile size would cause an error. +- Exceptions thrown off the main thread are now caught and logged, and Zarr throws an error in `append`. +- Job queue is now cleared after every operation. ## [0.1.4](https://github.com/acquire-project/acquire-driver-zarr/compare/v0.1.3...v0.1.4) - 2023-08-11 diff --git a/README.md b/README.md index 8fc700b6..45404a43 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,9 @@ This is an Acquire Driver that supports chunked streaming to [zarr][]. - **Zarr** - **ZarrBlosc1ZstdByteShuffle** - **ZarrBlosc1Lz4ByteShuffle** +- **ZarrV3** +- **ZarrV3Blosc1ZstdByteShuffle** +- **ZarrV3Blosc1Lz4ByteShuffle** ## Using the Zarr storage device @@ -24,6 +27,10 @@ Chunking is configured using `storage_properties_set_chunking_props()` when conf Multiscale storage can be enabled or disabled by calling `storage_properties_set_enable_multiscale()` when configuring the video stream. +For the [Zarr v3] version of each device, you can use the `ZarrV3*` devices. +**Note:** Zarr v3 is not [yet](https://github.com/ome/ngff/pull/206) supported by the Python OME-Zarr library, so you +will not be able to read multiscale metadata from the resulting dataset. + ### Configuring chunking You can configure chunking by calling `storage_properties_set_chunking_props()` on your `StorageProperties` object @@ -41,21 +48,21 @@ storage_properties_set_chunking_props(struct StorageProperties* out, ``` | ![frames](https://github.com/aliddell/acquire-driver-zarr/assets/844464/3510d468-4751-4fa0-b2bf-0e29a5f3ea1c) | -|:--:| -| A collection of frames. | +|:-------------------------------------------------------------------------------------------------------------:| +| A collection of frames. | A _tile_ is a contiguous section, or region of interest, of a _frame_. | ![tiles](https://github.com/aliddell/acquire-driver-zarr/assets/844464/f8d16139-e0ac-44db-855f-2f5ef305c98b) | -|:--:| -| A collection of frames, divided into tiles. | +|:------------------------------------------------------------------------------------------------------------:| +| A collection of frames, divided into tiles. | A _chunk_ is nothing more than some number of stacked tiles from subsequent frames, with each tile in a chunk having the same ROI in its respective frame. -| ![chunks](https://github.com/aliddell/acquire-driver-zarr/assets/844464/653e4d82-363e-4e04-9a42-927b052fb6e7) | -|:--:| -| A collection of frames, divided into tiles. A single chunk has been highlighted in red. | +| ![chunks](https://github.com/aliddell/acquire-driver-zarr/assets/844464/653e4d82-363e-4e04-9a42-927b052fb6e7) | +|:-------------------------------------------------------------------------------------------------------------:| +| A collection of frames, divided into tiles. A single chunk has been highlighted in red. | You can specify the width and height, in pixels, of each tile, and if your frame size has more than one plane, you can specify the number of planes you want per tile as well. @@ -120,3 +127,5 @@ Then the sequence of levels will have dimensions 1920 x 1080, 960 x 540, 480 x 2 [Blosc]: https://github.com/Blosc/c-blosc [Blosc docs]: https://www.blosc.org/ + +[Zarr v3]: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html \ No newline at end of file diff --git a/src/3rdParty/cblosc/CMakeLists.txt b/src/3rdParty/cblosc/CMakeLists.txt index 7b04d71d..96385ce3 100644 --- a/src/3rdParty/cblosc/CMakeLists.txt +++ b/src/3rdParty/cblosc/CMakeLists.txt @@ -7,10 +7,10 @@ if(WIN32) set(libd "${pwd}/lib/win64/libblosc-debug.lib") elseif(APPLE) set(lib "${pwd}/lib/osx/libblosc.a") - set(libd ${lib}) + set(libd "${pwd}/lib/osx/libblosc-debug.a") elseif(LINUX) set(lib "${pwd}/lib/linux-amd64/libblosc.a") - set(libd ${lib}) + set(libd "${pwd}/lib/linux-amd64/libblosc-debug.a") endif() message(STATUS "C-Blosc: ${pwd}") diff --git a/src/3rdParty/cblosc/lib/linux-amd64/libblosc-debug.a b/src/3rdParty/cblosc/lib/linux-amd64/libblosc-debug.a new file mode 100644 index 00000000..765b4791 Binary files /dev/null and b/src/3rdParty/cblosc/lib/linux-amd64/libblosc-debug.a differ diff --git a/src/3rdParty/cblosc/lib/linux-amd64/libblosc.a b/src/3rdParty/cblosc/lib/linux-amd64/libblosc.a index 8c3e7bdc..464a725b 100644 Binary files a/src/3rdParty/cblosc/lib/linux-amd64/libblosc.a and b/src/3rdParty/cblosc/lib/linux-amd64/libblosc.a differ diff --git a/src/3rdParty/cblosc/lib/osx/README.md b/src/3rdParty/cblosc/lib/osx/README.md index 9e0d30c5..766bb60a 100644 --- a/src/3rdParty/cblosc/lib/osx/README.md +++ b/src/3rdParty/cblosc/lib/osx/README.md @@ -1,6 +1,6 @@ The library here was built as follows. -Against v1.21.4 (2c2f9bd) of the c-blosc source hosted at: +Against v1.21.5 (d306135) of the c-blosc source hosted at: https://github.com/Blosc/c-blosc It's a universal binary compiled for `arm64` and `x86_64`. diff --git a/src/3rdParty/cblosc/lib/osx/libblosc-debug.a b/src/3rdParty/cblosc/lib/osx/libblosc-debug.a new file mode 100644 index 00000000..5a5f5408 Binary files /dev/null and b/src/3rdParty/cblosc/lib/osx/libblosc-debug.a differ diff --git a/src/3rdParty/cblosc/lib/osx/libblosc.a b/src/3rdParty/cblosc/lib/osx/libblosc.a index 968cffc0..72de14a1 100644 Binary files a/src/3rdParty/cblosc/lib/osx/libblosc.a and b/src/3rdParty/cblosc/lib/osx/libblosc.a differ diff --git a/src/3rdParty/cblosc/lib/win64/libblosc-debug.lib b/src/3rdParty/cblosc/lib/win64/libblosc-debug.lib index 666f6b50..3d262136 100644 Binary files a/src/3rdParty/cblosc/lib/win64/libblosc-debug.lib and b/src/3rdParty/cblosc/lib/win64/libblosc-debug.lib differ diff --git a/src/3rdParty/cblosc/lib/win64/libblosc.lib b/src/3rdParty/cblosc/lib/win64/libblosc.lib index a9b9b235..c94752b3 100644 Binary files a/src/3rdParty/cblosc/lib/win64/libblosc.lib and b/src/3rdParty/cblosc/lib/win64/libblosc.lib differ diff --git a/src/3rdParty/cblosc/lib/win64/libblosc.pdb b/src/3rdParty/cblosc/lib/win64/libblosc.pdb index 25f4dc11..6500e236 100644 Binary files a/src/3rdParty/cblosc/lib/win64/libblosc.pdb and b/src/3rdParty/cblosc/lib/win64/libblosc.pdb differ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 50d59048..a43c52fd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,26 +1,28 @@ add_subdirectory(3rdParty) -if(NOT TARGET acquire-core-logger) - add_subdirectory(acquire-core-libs) -endif() +if (NOT TARGET acquire-core-logger) + add_subdirectory(acquire-core-libs) +endif () set(tgt acquire-driver-zarr) add_library(${tgt} MODULE prelude.h - tiled.frame.hh - tiled.frame.cpp - chunk.writer.hh - chunk.writer.cpp - frame.scaler.hh - frame.scaler.cpp + common.hh + common.cpp + writers/writer.hh + writers/writer.cpp + writers/chunk.writer.hh + writers/chunk.writer.cpp + writers/shard.writer.hh + writers/shard.writer.cpp + writers/blosc.compressor.hh + writers/blosc.compressor.cpp zarr.hh zarr.cpp - zarr.encoder.hh - zarr.encoder.cpp - zarr.raw.hh - zarr.raw.cpp - zarr.blosc.hh - zarr.blosc.cpp + zarr.v2.hh + zarr.v2.cpp + zarr.v3.hh + zarr.v3.cpp zarr.driver.c ) target_enable_simd(${tgt}) diff --git a/src/chunk.writer.cpp b/src/chunk.writer.cpp deleted file mode 100644 index 6ad1e28f..00000000 --- a/src/chunk.writer.cpp +++ /dev/null @@ -1,264 +0,0 @@ -#include "chunk.writer.hh" - -#include -#include -#include -#include - -#include "device/props/components.h" -#include "platform.h" - -/// Check that a==b -/// example: `ASSERT_EQ(int,"%d",42,meaning_of_life())` -#define ASSERT_EQ(T, fmt, a, b) \ - do { \ - T a_ = (T)(a); \ - T b_ = (T)(b); \ - EXPECT(a_ == b_, "Expected %s==%s but " fmt "!=" fmt, #a, #b, a_, b_); \ - } while (0) - -namespace fs = std::filesystem; -namespace zarr = acquire::sink::zarr; - -namespace { -size_t -bytes_of_type(const SampleType& type) noexcept -{ - if (type >= SampleTypeCount) - return 0; - - static size_t table[SampleTypeCount]; // = { 1, 2, 1, 2, 4, 2, 2, 2 }; -#define XXX(s, b) table[(s)] = (b) - XXX(SampleType_u8, 1); - XXX(SampleType_u16, 2); - XXX(SampleType_i8, 1); - XXX(SampleType_i16, 2); - XXX(SampleType_f32, 4); - XXX(SampleType_u10, 2); - XXX(SampleType_u12, 2); - XXX(SampleType_u14, 2); -#undef XXX - return table[type]; -} - -size_t -bytes_per_tile(const ImageShape& image, const zarr::TileShape& tile) -{ - return bytes_of_type(image.type) * image.dims.channels * tile.width * - tile.height * tile.planes; -} -} // ::{anonymous} - -namespace acquire::sink::zarr { -CompressionParams::CompressionParams() - : clevel_{ 1 } - , shuffle_{ 1 } -{ -} - -CompressionParams::CompressionParams(const std::string& codec_id, - int clevel, - int shuffle) - : codec_id_{ codec_id } - , clevel_{ clevel } - , shuffle_{ shuffle } -{ -} - -ChunkWriter::ChunkWriter(BaseEncoder* encoder, - const ImageShape& image_shape, - const TileShape& tile_shape, - uint32_t lod, - uint32_t tile_col, - uint32_t tile_row, - uint32_t tile_plane, - uint64_t max_bytes_per_chunk, - char dimension_separator, - const std::string& base_directory) - : encoder_{ encoder } - , bytes_per_chunk_{ 0 } - , tiles_per_chunk_{ 0 } - , bytes_written_{ 0 } - , current_chunk_{ 0 } - , dimension_separator_{ dimension_separator } - , base_dir_{ base_directory } - , current_file_{} - , layer_{ lod } - , tile_col_{ tile_col } - , tile_row_{ tile_row } - , tile_plane_{ tile_plane } - , image_shape_{ image_shape } - , tile_shape_{ tile_shape } -{ - CHECK(encoder_); - const auto bpt = (float)::bytes_per_tile(image_shape_, tile_shape_); - EXPECT(bpt > 0, "Computed zero bytes per tile.", bpt); - - tiles_per_chunk_ = std::floor((float)max_bytes_per_chunk / bpt); - EXPECT(tiles_per_chunk_ > 0, - "Given %lu bytes per chunk, %lu bytes per tile.", - max_bytes_per_chunk, - ::bytes_of_type(image_shape.type)); - - // this is guaranteed to be positive - bytes_per_chunk_ = tiles_per_chunk_ * (size_t)bpt; - - EXPECT('.' == dimension_separator || '/' == dimension_separator, - "Expecting either '.' or '/' for dimension separator, got '%c'.", - dimension_separator); -} - -ChunkWriter::~ChunkWriter() -{ - close_current_file(); - delete encoder_; -} - -bool -ChunkWriter::write_frame(const TiledFrame& frame) -{ - std::scoped_lock lock(mutex_); - const size_t bpt = ::bytes_per_tile(image_shape_, tile_shape_); - if (buffer_.size() < bpt) - buffer_.resize(bpt); - - uint8_t* data = buffer_.data(); - size_t nbytes = - frame.copy_tile(data, bpt, tile_col_, tile_row_, tile_plane_); - - nbytes = write(data, data + nbytes); - - return nbytes == bpt; -} - -const ImageShape& -ChunkWriter::image_shape() const noexcept -{ - return image_shape_; -} - -const TileShape& -ChunkWriter::tile_shape() const noexcept -{ - return tile_shape_; -} - -uint32_t -ChunkWriter::frames_written() const -{ - const uint64_t bpt = bytes_per_tile(image_shape_, tile_shape_); - CHECK(bpt > 0); - return (uint32_t)(bytes_written_ / bpt); -} - -size_t -ChunkWriter::write(const uint8_t* beg, const uint8_t* end) -{ - const size_t bytes_in = (uint8_t*)end - (uint8_t*)beg; - if (0 == bytes_in) - return 0; - - if (!current_file_.has_value()) - open_chunk_file(); - - size_t bytes_out = 0; - auto* cur = (uint8_t*)beg; - - // we should never see this, but if the number of bytes brings us past - // the chunk boundary, we need to rollover - CHECK(bytes_per_chunk_ > 0); - const size_t bytes_of_this_chunk = bytes_written_ % bytes_per_chunk_; - if (bytes_in + bytes_of_this_chunk > bytes_per_chunk_) { - const size_t bytes_remaining = bytes_per_chunk_ - bytes_of_this_chunk; - - bytes_out = encoder_->write(beg, beg + bytes_remaining); - bytes_written_ += bytes_out; - if (bytes_out && bytes_written_ % bytes_per_chunk_ == 0) - rollover(); - - cur += bytes_out; - } - - if (auto b = encoder_->write(cur, end); b > 0) { - bytes_written_ += b; - bytes_out += b; - - if (bytes_written_ % bytes_per_chunk_ == 0) - rollover(); - } - - return bytes_out; -} - -void -ChunkWriter::open_chunk_file() -{ - char file_path[512]; - snprintf(file_path, - sizeof(file_path) - 1, - "%d%c%d%c%d%c%d%c%d", - layer_, - dimension_separator_, - current_chunk_, - dimension_separator_, - tile_plane_, - dimension_separator_, - tile_row_, - dimension_separator_, - tile_col_); - - std::string path = (fs::path(base_dir_) / file_path).string(); - auto parent_path = fs::path(path).parent_path(); - - if (!fs::is_directory(parent_path)) - fs::create_directories(parent_path); - - current_file_ = file{}; - CHECK(file_create(¤t_file_.value(), path.c_str(), path.size())); - - encoder_->set_file(¤t_file_.value()); -} - -void -ChunkWriter::close_current_file() -{ - if (!current_file_.has_value()) - return; - - const size_t bpt = bytes_per_tile(image_shape_, tile_shape_); - CHECK(bpt > 0); - const size_t tiles_written = bytes_written_ / bpt; - - if (tiles_written > tiles_per_chunk_ && - tiles_written % tiles_per_chunk_ > 0) - finalize_chunk(); - - encoder_->flush(); - - file_close(¤t_file_.value()); - current_file_.reset(); - - encoder_->set_file(nullptr); -} - -void -ChunkWriter::finalize_chunk() -{ - CHECK(bytes_per_chunk_ > 0); - size_t bytes_remaining = - bytes_per_chunk_ - (bytes_written_ % bytes_per_chunk_); - std::vector zeros(bytes_remaining); - std::fill(zeros.begin(), zeros.end(), 0); - - bytes_written_ += - encoder_->write(zeros.data(), zeros.data() + bytes_remaining); -} - -void -ChunkWriter::rollover() -{ - TRACE("Rolling over"); - close_current_file(); - ++current_chunk_; -} -} // namespace acquire::sink::zarr \ No newline at end of file diff --git a/src/chunk.writer.hh b/src/chunk.writer.hh deleted file mode 100644 index 22a6956f..00000000 --- a/src/chunk.writer.hh +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef H_ACQUIRE_ZARR_CHUNK_WRITER_V0 -#define H_ACQUIRE_ZARR_CHUNK_WRITER_V0 - -#include -#include - -#include "platform.h" - -#include "zarr.encoder.hh" -#include "zarr.blosc.hh" -#include "tiled.frame.hh" - -namespace acquire::sink::zarr { - -struct ChunkWriter final -{ - public: - ChunkWriter() = delete; - - /// @param encoder Encoder to use for encoding data as it comes in. - /// @param image_shape Shape and strides of the frame. - /// @param tile_shape Dimensions of the tile. - /// @param lod Multiscale level of detail. Full resolution is 0. - /// @param tile_col Column index, in tile space, of this tile. - /// @param tile_row Row index, in tile space, of this tile. - /// @param tile_plane Plane index, in tile space, of this tile. - /// @param max_bytes_per_chunk Maximum bytes per chunk. - /// @param dimension_separator Separator to use between dimension names. - /// @param base_directory Base directory to write chunks to. - ChunkWriter(BaseEncoder* encoder, - const ImageShape& image_shape, - const TileShape& tile_shape, - uint32_t lod, - uint32_t tile_col, - uint32_t tile_row, - uint32_t tile_plane, - uint64_t max_bytes_per_chunk, - char dimension_separator, - const std::string& base_directory); - ~ChunkWriter(); - - [[nodiscard]] bool write_frame(const TiledFrame& frame); - - const ImageShape& image_shape() const noexcept; - const TileShape& tile_shape() const noexcept; - - uint32_t frames_written() const; - - private: - BaseEncoder* const encoder_; - - const uint32_t tile_col_; - const uint32_t tile_row_; - const uint32_t tile_plane_; - - uint64_t bytes_per_chunk_; - uint32_t tiles_per_chunk_; - uint64_t bytes_written_; - - std::string base_dir_; - uint32_t layer_; - int current_chunk_; - char dimension_separator_; - std::optional current_file_; - - std::optional compressor_; - - std::mutex mutex_; - ImageShape image_shape_; - TileShape tile_shape_; - - std::vector buffer_; - - void open_chunk_file(); - void close_current_file(); - size_t write(const uint8_t* beg, const uint8_t* end); - void finalize_chunk(); - void rollover(); -}; -} // namespace acquire::sink::zarr -#endif // H_ACQUIRE_ZARR_CHUNK_WRITER_V0 diff --git a/src/common.cpp b/src/common.cpp new file mode 100644 index 00000000..01881f35 --- /dev/null +++ b/src/common.cpp @@ -0,0 +1,95 @@ +#include "common.hh" + +#include "platform.h" + +#include + +namespace common = acquire::sink::zarr::common; + +size_t +common::bytes_of_type(const SampleType& type) +{ + CHECK(type < SampleTypeCount); + static size_t table[SampleTypeCount]; // = { 1, 2, 1, 2, 4, 2, 2, 2 }; +#define XXX(s, b) table[(s)] = (b) + XXX(SampleType_u8, 1); + XXX(SampleType_u16, 2); + XXX(SampleType_i8, 1); + XXX(SampleType_i16, 2); + XXX(SampleType_f32, 4); + XXX(SampleType_u10, 2); + XXX(SampleType_u12, 2); + XXX(SampleType_u14, 2); +#undef XXX + return table[type]; +} + +size_t +common::bytes_per_tile(const ImageDims& tile_shape, const SampleType& type) +{ + return bytes_of_type(type) * tile_shape.rows * tile_shape.cols; +} + +size_t +common::frames_per_chunk(const ImageDims& tile_shape, + SampleType type, + uint64_t max_bytes_per_chunk) +{ + auto bpt = (float)bytes_per_tile(tile_shape, type); + if (0 == bpt) + return 0; + + return (size_t)std::floor((float)max_bytes_per_chunk / bpt); +} + +size_t +common::bytes_per_chunk(const ImageDims& tile_shape, + const SampleType& type, + uint64_t max_bytes_per_chunk) +{ + return bytes_per_tile(tile_shape, type) * + frames_per_chunk(tile_shape, type, max_bytes_per_chunk); +} + +const char* +common::sample_type_to_dtype(SampleType t) + +{ + static const char* table[] = { "u1", "u2", "i1", "i2", + "f4", "u2", "u2", "u2" }; + if (t < countof(table)) { + return table[t]; + } else { + throw std::runtime_error("Invalid sample type."); + } +} + +const char* +common::sample_type_to_string(SampleType t) noexcept +{ + static const char* table[] = { "u8", "u16", "i8", "i16", + "f32", "u16", "u16", "u16" }; + if (t < countof(table)) { + return table[t]; + } else { + return "unrecognized pixel type"; + } +} + +void +common::write_string(const std::string& path, const std::string& str) +{ + if (auto p = fs::path(path); !fs::exists(p.parent_path())) + fs::create_directories(p.parent_path()); + + struct file f = { 0 }; + auto is_ok = file_create(&f, path.c_str(), path.size()); + is_ok &= file_write(&f, // file + 0, // offset + (uint8_t*)str.c_str(), // cur + (uint8_t*)(str.c_str() + str.size()) // end + ); + EXPECT(is_ok, "Write to \"%s\" failed.", path.c_str()); + TRACE("Wrote %d bytes to \"%s\".", str.size(), path.c_str()); + file_close(&f); +} diff --git a/src/common.hh b/src/common.hh new file mode 100644 index 00000000..3a2f9d23 --- /dev/null +++ b/src/common.hh @@ -0,0 +1,62 @@ +#ifndef ACQUIRE_DRIVER_ZARR_COMMON_H +#define ACQUIRE_DRIVER_ZARR_COMMON_H + +#include "prelude.h" + +#include "device/props/components.h" + +#include + +namespace fs = std::filesystem; + +namespace acquire::sink::zarr { +struct ImageDims +{ + uint32_t cols; + uint32_t rows; + + friend bool operator<=(const ImageDims& lhs, const ImageDims& rhs) noexcept + { + return (lhs.cols <= rhs.cols) && (lhs.rows <= rhs.rows); + } +}; + +namespace common { +size_t +bytes_of_type(const SampleType& type); + +size_t +bytes_per_tile(const ImageDims& tile_shape, const SampleType& type); + +size_t +frames_per_chunk(const ImageDims& tile_shape, + SampleType type, + uint64_t max_bytes_per_chunk); + +size_t +bytes_per_chunk(const ImageDims& tile_shape, + const SampleType& type, + uint64_t max_bytes_per_chunk); + +/// \brief Get the Zarr dtype for a given SampleType. +/// \param t An enumerated sample type. +/// \throw std::runtime_error if \par t is not a valid SampleType. +/// \return A representation of the SampleType \par t expected by a Zarr reader. +const char* +sample_type_to_dtype(SampleType t); + +/// \brief Get a string representation of the SampleType enum. +/// \param t An enumerated sample type. +/// \return A human-readable representation of the SampleType \par t. +const char* +sample_type_to_string(SampleType t) noexcept; + +/// \brief Write a string to a file. +/// @param path The path of the file to write. +/// @param str The string to write. +void +write_string(const std::string& path, const std::string& value); +} // namespace acquire::sink::zarr::common +} // namespace acquire::sink::zarr + +#endif // ACQUIRE_DRIVER_ZARR_COMMON_H diff --git a/src/frame.scaler.cpp b/src/frame.scaler.cpp deleted file mode 100644 index 5e7eef6f..00000000 --- a/src/frame.scaler.cpp +++ /dev/null @@ -1,442 +0,0 @@ -#include "frame.scaler.hh" -#include "zarr.hh" - -#include -#include - -namespace { -namespace zarr = acquire::sink::zarr; - -size_t -bytes_of_type(const SampleType& type) -{ - CHECK(type < SampleTypeCount); - static size_t table[SampleTypeCount]; // = { 1, 2, 1, 2, 4, 2, 2, 2 }; -#define XXX(s, b) table[(s)] = (b) - XXX(SampleType_u8, 1); - XXX(SampleType_u16, 2); - XXX(SampleType_i8, 1); - XXX(SampleType_i16, 2); - XXX(SampleType_f32, 4); - XXX(SampleType_u10, 2); - XXX(SampleType_u12, 2); - XXX(SampleType_u14, 2); -#undef XXX - return table[type]; -} - -template -void -average_one_frame(std::shared_ptr dst, - std::shared_ptr src) -{ - CHECK(dst); - CHECK(src); - - const auto& src_shape = src->image_shape(); - const int downscale = 2; - const auto factor = 0.125f; - - const auto width = src_shape.dims.width; - const auto w_pad = width + (width % downscale); - - const auto height = src_shape.dims.height; - const auto h_pad = height + (height % downscale); - - const auto planes = src_shape.dims.planes; - const auto p_pad = planes > 1 ? planes + (planes % downscale) : 1; - - CHECK(dst->bytes_of_image() >= w_pad * h_pad * p_pad * factor * sizeof(T)); - - const auto* src_img = (T*)src->image(); - auto* dst_img = (T*)dst->data(); - - size_t dst_idx = 0; - for (auto plane = 0; plane < planes; plane += downscale) { - const bool pad_plane = (plane == planes - 1); - - for (auto row = 0; row < height; row += downscale) { - const bool pad_height = (row == height - 1 && height != h_pad); - - for (auto col = 0; col < width; col += downscale) { - const bool pad_width = (col == width - 1 && width != w_pad); - - size_t idx = plane * width * height + row * width + col; - dst_img[dst_idx++] = - (T)(factor * - ((float)src_img[idx] + - (float)src_img[idx + (1 - (int)pad_width)] + - (float)src_img[idx + width * (1 - (int)pad_height)] + - (float)src_img[idx + width * (1 - (int)pad_height) + - (1 - (int)pad_width)] + - (float) - src_img[idx + width * height * (1 - (int)pad_plane)] + - (float) - src_img[idx + width * height * (1 - (int)pad_plane) + - (1 - (int)pad_width)] + - (float) - src_img[idx + width * height * (1 - (int)pad_plane) + - width * (1 - (int)pad_height)] + - (float) - src_img[idx + width * height * (1 - (int)pad_plane) + - width * (1 - (int)pad_height) + - (1 - (int)pad_width)])); - } - } - } -} - -template -void -average_two_frames(std::shared_ptr dst, - std::shared_ptr src1, - std::shared_ptr src2) -{ - CHECK(dst); - CHECK(src1); - CHECK(src2); - - CHECK(dst->bytes_of_image() == src1->bytes_of_image() && - dst->bytes_of_image() == src2->bytes_of_image()); - - const float factor = 0.5f; - const size_t npx = dst->bytes_of_image() / sizeof(T); - - const auto* src1_img = (T*)src1->image(); - const auto* src2_img = (T*)src2->image(); - auto* dst_img = (T*)dst->data(); - - for (auto i = 0; i < npx; ++i) { - dst_img[i] = (T)(factor * ((float)src1_img[i] + (float)src2_img[i])); - } -} -} // :: namespace - -namespace acquire::sink::zarr { -ScalingParameters::ScalingParameters(const ImageShape& image_shape, - const TileShape& tile_shape) - : image_shape{ image_shape } - , tile_shape{ tile_shape } -{ -} - -FrameScaler::FrameScaler(Zarr* zarr, - const ImageShape& image_shape, - const TileShape& tile_shape) - : zarr_{ zarr } -{ - CHECK(zarr_); - scaling_params_ = make_scaling_parameters(image_shape, tile_shape); - for (int16_t i = 1; i < scaling_params_.size(); ++i) { - accumulators_.insert({ i, {} }); - } -} - -bool -FrameScaler::push_frame(std::shared_ptr frame) -{ - std::unique_lock lock(mutex_); - try { - zarr_->push_frame_to_writers(frame); - if (accumulators_.contains(1)) { - downsample_and_accumulate(frame, 1); - } - return true; - } catch (const std::exception& exc) { - LOGE("Exception: %s\n", exc.what()); - } catch (...) { - LOGE("Exception: (unknown)"); - } - - return false; -} - -void -FrameScaler::downsample_and_accumulate(std::shared_ptr frame, - int16_t layer) -{ - std::vector>& accumulator = - accumulators_.at(layer); - - const ImageShape& image_shape = scaling_params_.at(layer - 1).image_shape; - auto dst = - std::make_shared(frame->frame_id(), - layer, - scaling_params_.at(layer).image_shape, - scaling_params_.at(layer).tile_shape); - - switch (image_shape.type) { - case SampleType_u10: - case SampleType_u12: - case SampleType_u14: - case SampleType_u16: - average_one_frame(dst, frame); - if (accumulator.size() == 1) { - auto averaged = std::make_shared(dst->frame_id(), - dst->layer(), - dst->image_shape(), - dst->tile_shape()); - average_two_frames( - averaged, accumulator.front(), dst); - accumulator.clear(); - - zarr_->push_frame_to_writers(averaged); - if (layer < scaling_params_.size() - 1) { - downsample_and_accumulate(averaged, layer + 1); - } - } else { - accumulator.push_back(dst); - } - break; - case SampleType_i8: - average_one_frame(dst, frame); - if (accumulator.size() == 1) { - auto averaged = std::make_shared(dst->frame_id(), - dst->layer(), - dst->image_shape(), - dst->tile_shape()); - average_two_frames(averaged, accumulator.front(), dst); - accumulator.clear(); - - zarr_->push_frame_to_writers(averaged); - if (layer < scaling_params_.size() - 1) { - downsample_and_accumulate(averaged, layer + 1); - } - } else { - accumulator.push_back(dst); - } - break; - case SampleType_i16: - average_one_frame(dst, frame); - if (accumulator.size() == 1) { - auto averaged = std::make_shared(dst->frame_id(), - dst->layer(), - dst->image_shape(), - dst->tile_shape()); - average_two_frames(averaged, accumulator.front(), dst); - accumulator.clear(); - - zarr_->push_frame_to_writers(averaged); - if (layer < scaling_params_.size() - 1) { - downsample_and_accumulate(averaged, layer + 1); - } - } else { - accumulator.push_back(dst); - } - break; - case SampleType_f32: - average_one_frame(dst, frame); - if (accumulator.size() == 1) { - auto averaged = std::make_shared(dst->frame_id(), - dst->layer(), - dst->image_shape(), - dst->tile_shape()); - average_two_frames(averaged, accumulator.front(), dst); - accumulator.clear(); - - zarr_->push_frame_to_writers(averaged); - if (layer < scaling_params_.size() - 1) { - downsample_and_accumulate(averaged, layer + 1); - } - } else { - accumulator.push_back(dst); - } - break; - case SampleType_u8: - default: - average_one_frame(dst, frame); - if (accumulator.size() == 1) { - auto averaged = std::make_shared(dst->frame_id(), - dst->layer(), - dst->image_shape(), - dst->tile_shape()); - average_two_frames(averaged, accumulator.front(), dst); - accumulator.clear(); - - zarr_->push_frame_to_writers(averaged); - if (layer < scaling_params_.size() - 1) { - downsample_and_accumulate(averaged, layer + 1); - } - } else { - accumulator.push_back(dst); - } - break; - } -} - -std::vector -make_scaling_parameters(const ImageShape& base_image_shape, - const TileShape& base_tile_shape) -{ - std::vector shapes; - shapes.emplace_back(base_image_shape, base_tile_shape); - - const int downscale = 2; - - uint32_t w = base_image_shape.dims.width; - uint32_t h = base_image_shape.dims.height; - - while (w > base_tile_shape.width || h > base_tile_shape.height) { - w = (w + (w % downscale)) / downscale; - h = (h + (h % downscale)) / downscale; - - ImageShape im_shape = base_image_shape; - im_shape.dims.width = w; - im_shape.dims.height = h; - im_shape.strides.width = im_shape.strides.channels; - im_shape.strides.height = im_shape.strides.width * w; - im_shape.strides.planes = im_shape.strides.height * h; - - TileShape tile_shape = base_tile_shape; - if (tile_shape.width > w) - tile_shape.width = w; - - if (tile_shape.height > h) - tile_shape.height = h; - - shapes.emplace_back(im_shape, tile_shape); - } - - return shapes; -} -} // namespace acquire::sink::zarr - -#ifndef NO_UNIT_TESTS - -#ifdef _WIN32 -#define acquire_export __declspec(dllexport) -#else -#define acquire_export -#endif - -///< Test that a single frame with 1 plane is padded and averaged correctly. -template -void -test_average_frame_inner(const SampleType& stype) -{ - ImageShape image_shape { - .dims = { - .channels = 1, - .width = 3, - .height = 3, - .planes = 1, - }, - .strides = { - .channels = 1, - .width = 1, - .height = 3, - .planes = 9 - }, - .type = stype - }; - zarr::TileShape tile_shape{ .width = 3, .height = 3, .planes = 1 }; - - auto src = - std::make_shared(0, 0, image_shape, tile_shape); - for (auto i = 0; i < 9; ++i) { - ((T*)src->data())[i] = (T)(i + 1); - } - - image_shape.dims = { .channels = 1, .width = 2, .height = 2, .planes = 1 }; - image_shape.strides = { - .channels = 1, .width = 1, .height = 2, .planes = 4 - }; - tile_shape = { - .width = 2, - .height = 2, - .planes = 1, - }; - - auto dst = - std::make_shared(0, 0, image_shape, tile_shape); - - average_one_frame(dst, src); - CHECK(((T*)dst->image())[0] == (T)3); - CHECK(((T*)dst->image())[1] == (T)4.5); - CHECK(((T*)dst->image())[2] == (T)7.5); - CHECK(((T*)dst->image())[3] == (T)9); -} - -///< Test that a single frame with 3 planes is padded and averaged correctly. -template -void -test_average_planes_inner(const SampleType& stype) -{ - ImageShape image_shape { - .dims = { - .channels = 1, - .width = 4, - .height = 4, - .planes = 3, - }, - .strides = { - .channels = 1, - .width = 1, - .height = 4, - .planes = 16 - }, - .type = stype - }; - zarr::TileShape tile_shape{ .width = 4, .height = 4, .planes = 1 }; - - auto src = - std::make_shared(0, 0, image_shape, tile_shape); - for (auto i = 0; i < 48; ++i) { - ((T*)src->data())[i] = (T)(i + 1); - } - - image_shape.dims = { .channels = 1, .width = 2, .height = 2, .planes = 2 }; - image_shape.strides = { - .channels = 1, .width = 1, .height = 2, .planes = 4 - }; - tile_shape = { - .width = 2, - .height = 2, - .planes = 2, - }; - - auto dst = - std::make_shared(0, 0, image_shape, tile_shape); - - average_one_frame(dst, src); - CHECK(((T*)dst->image())[0] == (T)11.5); - CHECK(((T*)dst->image())[1] == (T)13.5); - CHECK(((T*)dst->image())[2] == (T)19.5); - CHECK(((T*)dst->image())[3] == (T)21.5); - CHECK(((T*)dst->image())[4] == (T)35.5); - CHECK(((T*)dst->image())[5] == (T)37.5); - CHECK(((T*)dst->image())[6] == (T)43.5); - CHECK(((T*)dst->image())[7] == (T)45.5); -} - -extern "C" -{ - acquire_export int unit_test__average_frame() - { - try { - test_average_frame_inner(SampleType_u8); - test_average_planes_inner(SampleType_u8); - - test_average_frame_inner(SampleType_i8); - test_average_planes_inner(SampleType_i8); - - test_average_frame_inner(SampleType_u16); - test_average_planes_inner(SampleType_u16); - - test_average_frame_inner(SampleType_i16); - test_average_planes_inner(SampleType_i16); - - test_average_frame_inner(SampleType_f32); - test_average_planes_inner(SampleType_f32); - } catch (const std::exception& exc) { - LOGE("Exception: %s\n", exc.what()); - return 0; - } catch (...) { - LOGE("Exception: (unknown)"); - return 0; - } - - return 1; - } -} -#endif diff --git a/src/frame.scaler.hh b/src/frame.scaler.hh deleted file mode 100644 index 7e533a26..00000000 --- a/src/frame.scaler.hh +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef H_ACQUIRE_ZARR_FRAME_SCALER_V0 -#define H_ACQUIRE_ZARR_FRAME_SCALER_V0 - -#ifdef __cplusplus - -#include -#include -#include -#include -#include - -#include "prelude.h" -#include "tiled.frame.hh" -#include "chunk.writer.hh" - -namespace acquire::sink::zarr { -class Zarr; - -struct ScalingParameters -{ - ImageShape image_shape; - TileShape tile_shape; - ScalingParameters(const ImageShape& image_shape, - const TileShape& tile_shape); -}; - -struct FrameScaler final -{ - public: - FrameScaler() = delete; - FrameScaler(Zarr* zarr, - const ImageShape& image_shape, - const TileShape& tile_shape); - FrameScaler(const FrameScaler&) = delete; - ~FrameScaler() = default; - - [[nodiscard]] bool push_frame(std::shared_ptr frame); - - private: - Zarr* zarr_; // non-owning - - std::vector scaling_params_; - - // Accumulate downsampled layers until we have enough to average and write. - std::unordered_map>> - accumulators_; - - mutable std::mutex mutex_; - - void downsample_and_accumulate(std::shared_ptr frame, - int16_t layer); -}; - -std::vector -make_scaling_parameters(const ImageShape& base_image_shape, - const TileShape& base_tile_shape); -} // namespace acquire::sink::zarr - -#endif // __cplusplus -#endif // H_ACQUIRE_ZARR_FRAME_SCALER_V0 diff --git a/src/tiled.frame.cpp b/src/tiled.frame.cpp deleted file mode 100644 index 946a3850..00000000 --- a/src/tiled.frame.cpp +++ /dev/null @@ -1,186 +0,0 @@ -#include "tiled.frame.hh" -#include "zarr.hh" - -#include -#include -#include - -#include "device/props/components.h" - -namespace zarr = acquire::sink::zarr; - -namespace { -size_t -bytes_of_type(const SampleType& type) -{ - CHECK(type < SampleTypeCount); - static size_t table[SampleTypeCount]; // = { 1, 2, 1, 2, 4, 2, 2, 2 }; -#define XXX(s, b) table[(s)] = (b) - XXX(SampleType_u8, 1); - XXX(SampleType_u16, 2); - XXX(SampleType_i8, 1); - XXX(SampleType_i16, 2); - XXX(SampleType_f32, 4); - XXX(SampleType_u10, 2); - XXX(SampleType_u12, 2); - XXX(SampleType_u14, 2); -#undef XXX - return table[type]; -} - -size_t -bytes_per_tile(const ImageShape& image, const zarr::TileShape& tile) -{ - return bytes_of_type(image.type) * image.dims.channels * tile.width * - tile.height * tile.planes; -} -} // ::{anonymous} - -namespace acquire::sink::zarr { -TiledFrame::TiledFrame(const VideoFrame* frame, - const ImageShape& image_shape, - const TileShape& tile_shape) - : bytes_of_image_{ 0 } - , frame_id_{ 0 } - , layer_{ 0 } - , image_shape_{ image_shape } - , tile_shape_{ tile_shape } -{ - CHECK(frame); - CHECK(frame->data); - - bytes_of_image_ = frame->bytes_of_frame - sizeof(*frame); - CHECK(bytes_of_image_ > 0); - - buf_.resize(bytes_of_image_); - memcpy(buf_.data(), frame->data, bytes_of_image_); - - frame_id_ = frame->frame_id; -} - -TiledFrame::TiledFrame(uint64_t frame_id, - size_t layer, - const ImageShape& image_shape, - const TileShape& tile_shape) - : bytes_of_image_{ get_bytes_per_frame(image_shape) } - , frame_id_{ frame_id } - , layer_{ layer } - , image_shape_{ image_shape } - , tile_shape_{ tile_shape } -{ - buf_.resize(bytes_of_image_); - std::fill_n(buf_.begin(), bytes_of_image_, 0); -} - -size_t -TiledFrame::bytes_of_image() const -{ - return bytes_of_image_; -} - -const ImageShape& -TiledFrame::image_shape() const -{ - return image_shape_; -} - -const TileShape& -TiledFrame::tile_shape() const -{ - return tile_shape_; -} - -uint64_t -TiledFrame::frame_id() const -{ - return frame_id_; -} - -size_t -TiledFrame::layer() const -{ - return layer_; -} - -const uint8_t* -TiledFrame::image() const -{ - return buf_.data(); -} - -uint8_t* -TiledFrame::data() -{ - return buf_.data(); -} - -size_t -TiledFrame::copy_tile(uint8_t* tile, - size_t bytes_of_tile, - uint32_t tile_col, - uint32_t tile_row, - uint32_t tile_plane) const -{ - CHECK(tile); - CHECK(bytes_of_tile >= bytes_per_tile(image_shape_, tile_shape_)); - memset(tile, 0, bytes_of_tile); - - uint8_t* region = nullptr; - - const size_t bytes_per_row = bytes_of_type(image_shape_.type) * - image_shape_.dims.channels * tile_shape_.width; - - size_t offset = 0; - uint32_t frame_col = - tile_col * tile_shape_.width * image_shape_.dims.channels; - for (auto p = 0; p < tile_shape_.planes; ++p) { - size_t frame_plane = tile_plane * tile_shape_.planes + p; - for (auto r = 0; r < tile_shape_.height; ++r) { - uint32_t frame_row = tile_row * tile_shape_.height + r; - - size_t nbytes_row = - get_contiguous_region(®ion, frame_col, frame_row, frame_plane); - - // copy frame data into the tile buffer - if (0 < nbytes_row) { - CHECK(nullptr != region); - memcpy(tile + offset, region, nbytes_row); - } - - offset += bytes_per_row; - } - } - - return offset; -} - -size_t -TiledFrame::get_contiguous_region(uint8_t** region, - size_t frame_col, - size_t frame_row, - size_t frame_plane) const -{ - size_t nbytes = 0; - - auto* data = const_cast(buf_.data()); - - if (frame_row >= image_shape_.dims.height || - frame_plane >= image_shape_.dims.planes) { - *region = nullptr; - } else { - size_t frame_offset = - bytes_of_type(image_shape_.type) * - (frame_col + frame_row * image_shape_.strides.height + - frame_plane * image_shape_.strides.planes); - // widths are in pixels - size_t img_width = image_shape_.dims.width; - size_t tile_width = tile_shape_.width; - size_t region_width = - std::min(frame_col + tile_width, img_width) - frame_col; - nbytes = region_width * bytes_of_type(image_shape_.type); - *region = data + frame_offset; - } - - return nbytes; -} -} // acquire::sink::zarr diff --git a/src/tiled.frame.hh b/src/tiled.frame.hh deleted file mode 100644 index 66b17f61..00000000 --- a/src/tiled.frame.hh +++ /dev/null @@ -1,83 +0,0 @@ -#ifndef H_ACQUIRE_STORAGE_ZARR_TILED_FRAME_V0 -#define H_ACQUIRE_STORAGE_ZARR_TILED_FRAME_V0 - -#include -#include -#include -#include -#include -#include -#include - -#include "device/props/components.h" - -#include "prelude.h" - -namespace acquire::sink::zarr { -struct TileShape -{ - uint32_t width, height, planes; -}; - -class TiledFrame -{ - public: - TiledFrame() = delete; - TiledFrame(const VideoFrame* frame, - const ImageShape&, - const TileShape& tile_shape); - TiledFrame(uint64_t frame_id, - size_t layer, - const ImageShape& image_shape, - const TileShape& tile_shape); - TiledFrame(const TiledFrame&) = delete; - ~TiledFrame() = default; - - size_t bytes_of_image() const; - const ImageShape& image_shape() const; - const TileShape& tile_shape() const; - - uint64_t frame_id() const; - size_t layer() const; - const uint8_t* image() const; - uint8_t* data(); - - /// @brief Copy the tile indexed by @p tile_col, @p tile_row, and - /// @p tile_plane into the buffer at @p tile. - /// @param tile[out] Buffer to copy tile into. - /// @param bytes_of_tile[in] Size of @p tile. - /// @param tile_col[in] The column index, in tile space, of the tile. - /// @param tile_row[in] The row index, in tile space, of the tile. - /// @param tile_plane[in] The plane index, in tile space, of the tile. - /// @return The number of bytes written to @p tile. Should be exactly the - /// number of bytes in a tile. - [[nodiscard]] size_t copy_tile(uint8_t* tile, - size_t bytes_of_tile, - uint32_t tile_col, - uint32_t tile_row, - uint32_t tile_plane) const; - - private: - size_t bytes_of_image_; - uint64_t frame_id_; - size_t layer_; - std::vector buf_; - ImageShape image_shape_; - TileShape tile_shape_; - - /// @brief Get a pointer to the contiguous region determined by - /// @p frame_col, @p frame_row, and @p frame_plane, as well as the - /// number of bytes - /// @param region[out] Pointer to pointer to contiguous region. - /// @param frame_col[in] The column index, in the frame, of the region. - /// @param frame_row[in] The row index, in the frame, of the region. - /// @param frame_plane[in] The plane index, in the frame, of the region. - /// @return The number of bytes pointed to by @p *region. - [[nodiscard]] size_t get_contiguous_region(uint8_t** region, - size_t frame_col, - size_t frame_row, - size_t frame_plane) const; -}; -} // acquire::sink::zarr - -#endif // H_ACQUIRE_STORAGE_ZARR_TILED_FRAME_V0 diff --git a/src/writers/blosc.compressor.cpp b/src/writers/blosc.compressor.cpp new file mode 100644 index 00000000..80b6f4eb --- /dev/null +++ b/src/writers/blosc.compressor.cpp @@ -0,0 +1,37 @@ +#include "blosc.compressor.hh" + +namespace zarr = acquire::sink::zarr; +using json = nlohmann::json; + +zarr::BloscCompressionParams::BloscCompressionParams() + : clevel{ 1 } + , shuffle{ 1 } +{ +} + +zarr::BloscCompressionParams::BloscCompressionParams( + const std::string& codec_id, + int clevel, + int shuffle) + : codec_id{ codec_id } + , clevel{ clevel } + , shuffle{ shuffle } +{ +} + +void +zarr::to_json(json& j, const zarr::BloscCompressionParams& bcp) +{ + j = json{ { "id", std::string(bcp.id) }, + { "cname", bcp.codec_id }, + { "clevel", bcp.clevel }, + { "shuffle", bcp.shuffle } }; +} + +void +zarr::from_json(const json& j, zarr::BloscCompressionParams& bcp) +{ + j.at("cname").get_to(bcp.codec_id); + j.at("clevel").get_to(bcp.clevel); + j.at("shuffle").get_to(bcp.shuffle); +} \ No newline at end of file diff --git a/src/writers/blosc.compressor.hh b/src/writers/blosc.compressor.hh new file mode 100644 index 00000000..1736cea2 --- /dev/null +++ b/src/writers/blosc.compressor.hh @@ -0,0 +1,56 @@ +#ifndef H_ACQUIRE_ZARR_BLOSC_COMPRESSOR_V0 +#define H_ACQUIRE_ZARR_BLOSC_COMPRESSOR_V0 + +#ifndef __cplusplus +#error "This header requires C++20" +#endif + +#include "blosc.h" +#include "json.hpp" + +namespace acquire::sink::zarr { +enum class BloscCodecId : uint8_t +{ + Lz4 = BLOSC_LZ4, + Zstd = BLOSC_ZSTD +}; + +template +constexpr const char* +compression_codec_as_string(); + +template<> +constexpr const char* +compression_codec_as_string() +{ + return "zstd"; +} + +template<> +constexpr const char* +compression_codec_as_string() +{ + return "lz4"; +} + +struct BloscCompressionParams +{ + static constexpr char id[] = "blosc"; + std::string codec_id; + int clevel; + int shuffle; + + BloscCompressionParams(); + BloscCompressionParams(const std::string& codec_id, + int clevel, + int shuffle); +}; + +void +to_json(nlohmann::json&, const BloscCompressionParams&); + +void +from_json(const nlohmann::json&, BloscCompressionParams&); +} + +#endif // H_ACQUIRE_ZARR_BLOSC_COMPRESSOR_V0 diff --git a/src/writers/chunk.writer.cpp b/src/writers/chunk.writer.cpp new file mode 100644 index 00000000..f360bab2 --- /dev/null +++ b/src/writers/chunk.writer.cpp @@ -0,0 +1,215 @@ +#include "chunk.writer.hh" +#include "../zarr.hh" + +#include +#include + +namespace zarr = acquire::sink::zarr; + +zarr::ChunkWriter::ChunkWriter(const ImageDims& frame_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr) + : Writer(frame_dims, tile_dims, frames_per_chunk, data_root, zarr) +{ +} + +zarr::ChunkWriter::ChunkWriter(const ImageDims& frame_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr, + const BloscCompressionParams& compression_params) + : Writer(frame_dims, + tile_dims, + frames_per_chunk, + data_root, + zarr, + compression_params) +{ +} + +bool +zarr::ChunkWriter::write(const VideoFrame* frame) noexcept +{ + using namespace std::chrono_literals; + + if (!validate_frame_(frame)) { + // log is written in validate_frame + return false; + } + + try { + if (chunk_buffers_.empty()) { + make_buffers_(); + } + + // write out + bytes_to_flush_ += + write_bytes_(frame->data, frame->bytes_of_frame - sizeof(*frame)); + + ++frames_written_; + + // rollover if necessary + const auto frames_this_chunk = frames_written_ % frames_per_chunk_; + if (frames_written_ > 0 && frames_this_chunk == 0) { + flush_(); + rollover_(); + } + return true; + } catch (const std::exception& exc) { + char buf[128]; + snprintf(buf, sizeof(buf), "Failed to write frame: %s", exc.what()); + zarr_->set_error(buf); + } catch (...) { + char buf[32]; + snprintf(buf, sizeof(buf), "Failed to write frame (unknown)"); + zarr_->set_error(buf); + } + + return false; +} + +void +zarr::ChunkWriter::make_buffers_() noexcept +{ + const auto nchunks = tiles_per_frame_(); + chunk_buffers_.resize(nchunks); + buffers_ready_ = new bool[nchunks]; + std::fill(buffers_ready_, buffers_ready_ + nchunks, true); + + const auto bytes_of_type = common::bytes_of_type(pixel_type_); + const auto bytes_per_tile = + tile_dims_.cols * tile_dims_.rows * bytes_of_type; + + for (auto i = 0; i < chunk_buffers_.size(); ++i) { + auto& buf = chunk_buffers_.at(i); + buf.resize(frames_per_chunk_ * bytes_per_tile); + std::fill(buf.begin(), buf.end(), 0); + } +} + +size_t +zarr::ChunkWriter::write_bytes_(const uint8_t* buf, size_t buf_size) noexcept +{ + const auto bytes_of_type = common::bytes_of_type(pixel_type_); + const auto bytes_per_tile = + tile_dims_.cols * tile_dims_.rows * bytes_of_type; + const auto frames_this_chunk = frames_written_ % frames_per_chunk_; + + size_t bytes_written = 0; + + for (auto i = 0; i < tiles_per_frame_y_; ++i) { + for (auto j = 0; j < tiles_per_frame_x_; ++j) { + size_t offset = bytes_per_tile * frames_this_chunk; + + uint8_t* bytes_out = + chunk_buffers_.at(i * tiles_per_frame_x_ + j).data(); + for (auto k = 0; k < tile_dims_.rows; ++k) { + const auto frame_row = i * tile_dims_.rows + k; + if (frame_row < frame_dims_.rows) { + const auto frame_col = j * tile_dims_.cols; + + const auto buf_offset = + bytes_of_type * + (frame_row * frame_dims_.cols + frame_col); + + const auto region_width = + std::min(frame_col + tile_dims_.cols, frame_dims_.cols) - + frame_col; + + const auto nbytes = region_width * bytes_of_type; + memcpy(bytes_out + offset, buf + buf_offset, nbytes); + } + offset += tile_dims_.cols * bytes_of_type; + } + bytes_written += bytes_per_tile; + } + } + + return bytes_written; +} + +void +zarr::ChunkWriter::flush_() noexcept +{ + if (bytes_to_flush_ == 0) { + return; + } + + using namespace std::chrono_literals; + const auto bytes_of_type = common::bytes_of_type(pixel_type_); + const auto bytes_per_tile = + tile_dims_.cols * tile_dims_.rows * bytes_of_type; + if (bytes_to_flush_ % bytes_per_tile != 0) { + LOGE("Expected bytes to flush to be a multiple of the " + "number of bytes per tile."); + } + + // create chunk files if necessary + if (files_.empty() && !make_files_()) { + zarr_->set_error("Failed to flush."); + return; + } + + // compress buffers and write out + auto buf_sizes = compress_buffers_(); + std::fill(buffers_ready_, buffers_ready_ + chunk_buffers_.size(), false); + { + std::scoped_lock lock(mutex_); + for (auto i = 0; i < files_.size(); ++i) { + auto& buf = chunk_buffers_.at(i); + zarr_->push_to_job_queue(std::move( + [fh = &files_.at(i), + data = buf.data(), + size = buf_sizes.at(i), + finished = buffers_ready_ + i](std::string& err) -> bool { + bool success = false; + try { + success = file_write(fh, 0, data, data + size); + } catch (const std::exception& exc) { + char buf[128]; + snprintf(buf, + sizeof(buf), + "Failed to write chunk: %s", + exc.what()); + err = buf; + } catch (...) { + err = "Unknown error"; + } + *finished = true; + + return success; + })); + } + } + + // wait for all threads to finish + while (!std::all_of(buffers_ready_, + buffers_ready_ + chunk_buffers_.size(), + [](const auto& b) { return b; })) { + std::this_thread::sleep_for(500us); + } + + // reset buffers + const auto bytes_per_chunk = + tile_dims_.cols * tile_dims_.rows * bytes_of_type * frames_per_chunk_; + for (auto& buf : chunk_buffers_) { + // absurd edge case we need to account for + if (buf.size() > bytes_per_chunk) { + buf.resize(bytes_per_chunk); + } + + std::fill(buf.begin(), buf.end(), 0); + } + bytes_to_flush_ = 0; +} + +bool +zarr::ChunkWriter::make_files_() noexcept +{ + file_creator_.set_base_dir(data_root_ / std::to_string(current_chunk_)); + return file_creator_.create( + 1, tiles_per_frame_y_, tiles_per_frame_x_, files_); +} diff --git a/src/writers/chunk.writer.hh b/src/writers/chunk.writer.hh new file mode 100644 index 00000000..fae0b92c --- /dev/null +++ b/src/writers/chunk.writer.hh @@ -0,0 +1,52 @@ +#ifndef H_ACQUIRE_ZARR_CHUNK_WRITER_V0 +#define H_ACQUIRE_ZARR_CHUNK_WRITER_V0 + +#ifndef __cplusplus +#error "This header requires C++20" +#endif + +#include "writer.hh" + +#include "platform.h" +#include "device/props/components.h" + +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace acquire::sink::zarr { +struct ChunkWriter final : public Writer +{ + public: + ChunkWriter() = delete; + ChunkWriter(const ImageDims& frame_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr); + + /// Constructor with Blosc compression params + ChunkWriter(const ImageDims& frame_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr, + const BloscCompressionParams& compression_params); + ~ChunkWriter() override = default; + + [[nodiscard]] bool write(const VideoFrame* frame) noexcept override; + + private: + void make_buffers_() noexcept override; + size_t write_bytes_(const uint8_t* buf, size_t buf_size) noexcept override; + void flush_() noexcept override; + [[nodiscard]] bool make_files_() noexcept override; +}; +} // namespace acquire::sink::zarr + +#endif // H_ACQUIRE_ZARR_CHUNK_WRITER_V0 diff --git a/src/writers/shard.writer.cpp b/src/writers/shard.writer.cpp new file mode 100644 index 00000000..9449fa0c --- /dev/null +++ b/src/writers/shard.writer.cpp @@ -0,0 +1,292 @@ +#include "shard.writer.hh" +#include "../zarr.hh" + +#include +#include + +namespace zarr = acquire::sink::zarr; + +zarr::ShardWriter::ShardWriter(const ImageDims& frame_dims, + const ImageDims& shard_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr) + : Writer(frame_dims, tile_dims, frames_per_chunk, data_root, zarr) + , shard_dims_{ shard_dims } +{ + shards_per_frame_x_ = + std::ceil((float)frame_dims.cols / (float)shard_dims.cols); + shards_per_frame_y_ = + std::ceil((float)frame_dims.rows / (float)shard_dims.rows); +} + +zarr::ShardWriter::ShardWriter(const ImageDims& frame_dims, + const ImageDims& shard_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr, + const BloscCompressionParams& compression_params) + : Writer(frame_dims, + tile_dims, + frames_per_chunk, + data_root, + zarr, + compression_params) + , shard_dims_{ shard_dims } +{ + shards_per_frame_x_ = + std::ceil((float)frame_dims.cols / (float)shard_dims.cols); + shards_per_frame_y_ = + std::ceil((float)frame_dims.rows / (float)shard_dims.rows); +} + +bool +zarr::ShardWriter::write(const VideoFrame* frame) noexcept +{ + using namespace std::chrono_literals; + + if (!validate_frame_(frame)) { + // log is written in validate_frame + return false; + } + + try { + if (chunk_buffers_.empty()) { + make_buffers_(); + } + + bytes_to_flush_ += + write_bytes_(frame->data, frame->bytes_of_frame - sizeof(*frame)); + + ++frames_written_; + + // rollover if necessary + const auto frames_this_chunk = frames_written_ % frames_per_chunk_; + if (frames_written_ > 0 && frames_this_chunk == 0) { + flush_(); + rollover_(); + } + + return true; + } catch (const std::exception& exc) { + LOGE("Failed to write frame: %s", exc.what()); + } catch (...) { + LOGE("Failed to write frame (unknown)"); + } + + return false; +} + +uint16_t +zarr::ShardWriter::chunks_per_shard_() const +{ + const uint16_t chunks_per_shard_x = shard_dims_.cols / tile_dims_.cols; + const uint16_t chunks_per_shard_y = shard_dims_.rows / tile_dims_.rows; + return chunks_per_shard_x * chunks_per_shard_y; +} + +uint16_t +zarr::ShardWriter::shards_per_frame_() const +{ + return shards_per_frame_x_ * shards_per_frame_y_; +} + +void +zarr::ShardWriter::make_buffers_() noexcept +{ + const auto nchunks = tiles_per_frame_(); + chunk_buffers_.resize(nchunks); + buffers_ready_ = new bool[nchunks]; + std::fill(buffers_ready_, buffers_ready_ + nchunks, true); + + const auto bytes_of_type = common::bytes_of_type(pixel_type_); + const auto bytes_per_tile = + tile_dims_.cols * tile_dims_.rows * bytes_of_type; + const auto bytes_per_chunk = bytes_per_tile * frames_per_chunk_; + + for (auto& buf : chunk_buffers_) { + buf.resize(frames_per_chunk_ * bytes_per_tile); + std::fill(buf.begin(), buf.end(), 0); + } + + const auto nshards = shards_per_frame_(); + shard_buffers_.resize(nshards); + + for (auto& buf : shard_buffers_) { + buf.resize(chunks_per_shard_() * bytes_per_chunk // data + + 2 * chunks_per_shard_() * sizeof(uint64_t) // indices + ); + } +} + +size_t +zarr::ShardWriter::write_bytes_(const uint8_t* buf, size_t buf_size) noexcept +{ + const auto bytes_of_type = common::bytes_of_type(pixel_type_); + const auto bytes_per_tile = + tile_dims_.cols * tile_dims_.rows * bytes_of_type; + const auto frames_this_chunk = frames_written_ % frames_per_chunk_; + + size_t bytes_written = 0; + + for (auto i = 0; i < tiles_per_frame_y_; ++i) { + for (auto j = 0; j < tiles_per_frame_x_; ++j) { + size_t offset = bytes_per_tile * frames_this_chunk; + + uint8_t* bytes_out = + chunk_buffers_.at(i * tiles_per_frame_x_ + j).data(); + for (auto k = 0; k < tile_dims_.rows; ++k) { + const auto frame_row = i * tile_dims_.rows + k; + if (frame_row < frame_dims_.rows) { + const auto frame_col = j * tile_dims_.cols; + + const auto buf_offset = + bytes_of_type * + (frame_row * frame_dims_.cols + frame_col); + + const auto region_width = + std::min(frame_col + tile_dims_.cols, frame_dims_.cols) - + frame_col; + + const auto nbytes = region_width * bytes_of_type; + memcpy(bytes_out + offset, buf + buf_offset, nbytes); + } + offset += tile_dims_.cols * bytes_of_type; + } + bytes_written += bytes_per_tile; + } + } + + return bytes_written; +} + +void +zarr::ShardWriter::flush_() noexcept +{ + if (bytes_to_flush_ == 0) { + return; + } + + using namespace std::chrono_literals; + const auto bytes_of_type = common::bytes_of_type(pixel_type_); + const auto bytes_per_tile = + tile_dims_.cols * tile_dims_.rows * bytes_of_type; + if (bytes_to_flush_ % bytes_per_tile != 0) { + LOGE("Expected bytes to flush to be a multiple of the " + "number of bytes per tile."); + } + const auto chunks_per_shard = chunks_per_shard_(); + + // create shard files if necessary + if (files_.empty() && !make_files_()) { + zarr_->set_error("Failed to flush."); + return; + } + + // compress buffers + auto chunk_sizes = compress_buffers_(); + const size_t index_size = 2 * chunks_per_shard * sizeof(uint64_t); + + // concatenate chunks into shards + std::vector shard_sizes; + for (auto i = 0; i < shard_buffers_.size(); ++i) { + auto& shard = shard_buffers_.at(i); + size_t shard_size = 0; + std::vector chunk_indices; + + for (auto j = 0; j < chunks_per_shard; ++j) { + chunk_indices.push_back(shard_size); // chunk index + const auto k = i * chunks_per_shard + j; + shard_size += chunk_sizes.at(k); + chunk_indices.push_back(chunk_sizes.at(k)); // chunk extent + } + + // if we're very unlucky we can technically run into this + if (shard.size() < shard_size + index_size) { + shard.resize(shard_size + index_size); + } + + size_t offset = 0; + for (auto j = 0; j < chunks_per_shard; ++j) { + const auto k = i * chunks_per_shard + j; + const auto& chunk = chunk_buffers_.at(k); + memcpy(shard.data() + offset, chunk.data(), chunk_sizes.at(k)); + offset += chunk_sizes.at(k); + } + memcpy(shard.data() + offset, + chunk_indices.data(), + chunk_indices.size() * 8); + offset += chunk_indices.size() * 8; + shard_sizes.push_back(offset); + } + + // write out + std::fill(buffers_ready_, buffers_ready_ + shard_buffers_.size(), false); + { + std::scoped_lock lock(mutex_); + for (auto i = 0; i < files_.size(); ++i) { + const auto& shard = shard_buffers_.at(i); + zarr_->push_to_job_queue(std::move( + [fh = &files_.at(i), + shard = shard.data(), + size = shard_sizes.at(i), + finished = buffers_ready_ + i](std::string& err) -> bool { + bool success = false; + try { + success = file_write(fh, 0, shard, shard + size); + } catch (const std::exception& exc) { + char buf[128]; + snprintf(buf, + sizeof(buf), + "Failed to write shard: %s", + exc.what()); + err = buf; + } catch (...) { + err = "Failed to write shard (unknown)"; + } + *finished = true; + + return success; + })); + } + } + + // wait for all threads to finish + while (!std::all_of(buffers_ready_, + buffers_ready_ + shard_buffers_.size(), + [](const auto& b) { return b; })) { + std::this_thread::sleep_for(500us); + } + + // reset buffers + const auto bytes_per_chunk = + tile_dims_.cols * tile_dims_.rows * bytes_of_type * frames_per_chunk_; + for (auto& buf : chunk_buffers_) { + // absurd edge case we need to account for + if (buf.size() > bytes_per_chunk) { + buf.resize(bytes_per_chunk); + } + + std::fill(buf.begin(), buf.end(), 0); + } + const auto bytes_per_shard = bytes_per_chunk * chunks_per_shard; + for (auto& buf : shard_buffers_) { + // absurd edge case we need to account for + if (buf.size() > bytes_per_shard + index_size) { + buf.resize(bytes_per_shard + index_size); + } + + std::fill(buf.begin(), buf.end(), 0); + } + bytes_to_flush_ = 0; +} + +bool +zarr::ShardWriter::make_files_() noexcept +{ + file_creator_.set_base_dir(data_root_ / ("c" + std::to_string(current_chunk_))); + return file_creator_.create( + 1, shards_per_frame_y_, shards_per_frame_x_, files_); +} diff --git a/src/writers/shard.writer.hh b/src/writers/shard.writer.hh new file mode 100644 index 00000000..0d05edb3 --- /dev/null +++ b/src/writers/shard.writer.hh @@ -0,0 +1,63 @@ +#ifndef H_ACQUIRE_ZARR_SHARD_WRITER_V0 +#define H_ACQUIRE_ZARR_SHARD_WRITER_V0 + +#ifndef __cplusplus +#error "This header requires C++20" +#endif + +#include "writer.hh" + +#include "platform.h" +#include "device/props/components.h" + +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace acquire::sink::zarr { +struct ShardWriter final : public Writer +{ + public: + ShardWriter() = delete; + ShardWriter(const ImageDims& frame_dims, + const ImageDims& shard_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr); + + /// Constructor with Blosc compression params + ShardWriter(const ImageDims& frame_dims, + const ImageDims& shard_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr, + const BloscCompressionParams& compression_params); + ~ShardWriter() override = default; + + [[nodiscard]] bool write(const VideoFrame* frame) noexcept override; + + private: + ImageDims shard_dims_; + uint16_t shards_per_frame_x_; + uint16_t shards_per_frame_y_; + + std::vector> shard_buffers_; + + uint16_t chunks_per_shard_() const; + uint16_t shards_per_frame_() const; + + void make_buffers_() noexcept override; + size_t write_bytes_(const uint8_t* buf, size_t buf_size) noexcept override; + void flush_() noexcept override; + [[nodiscard]] bool make_files_() noexcept override; +}; +} // namespace acquire::sink::zarr + +#endif // H_ACQUIRE_ZARR_SHARD_WRITER_V0 diff --git a/src/writers/writer.cpp b/src/writers/writer.cpp new file mode 100644 index 00000000..c65e4b2d --- /dev/null +++ b/src/writers/writer.cpp @@ -0,0 +1,379 @@ +#include +#include "writer.hh" +#include "../zarr.hh" + +#include +#include + +namespace zarr = acquire::sink::zarr; + +/// DirectoryCreator +zarr::FileCreator::FileCreator(Zarr* zarr) + : zarr_{ zarr } +{ +} + +void +zarr::FileCreator::set_base_dir(const fs::path& base_dir) noexcept +{ + base_dir_ = base_dir; + fs::create_directories(base_dir_); +} + +bool +zarr::FileCreator::create(int n_c, + int n_y, + int n_x, + std::vector& files) noexcept +{ + using namespace std::chrono_literals; + + std::vector> mutexes; + for (auto i = 0; i < n_c; ++i) { + mutexes.push_back(std::make_shared()); + } + + files.resize(n_c * n_y * n_x); + std::vector finished(n_c * n_y, 0); + + // until we support more than one channel, n_c will always be 1 + for (auto c = 0; c < n_c; ++c) { + // create the channel directory + zarr_->push_to_job_queue( + [base = base_dir_, mtx = mutexes.at(c), c](std::string& err) -> bool { + try { + std::scoped_lock lock(*mtx); + const auto path = base / std::to_string(c); + if (fs::exists(path)) { + EXPECT(fs::is_directory(path), + "%s must be a directory.", + path.c_str()); + } else { + EXPECT(fs::create_directories(path), + "Failed to create directory: %s", + path.c_str()); + } + } catch (const std::exception& exc) { + char buf[128]; + snprintf(buf, + sizeof(buf), + "Failed to create directory: %s", + exc.what()); + err = buf; + return false; + } catch (...) { + err = "Failed to create directory (unknown)"; + return false; + } + return true; + }); + + for (auto y = 0; y < n_y; ++y) { + zarr_->push_to_job_queue( + [base = base_dir_, + files = files.data() + c * n_y * n_x + y * n_x, + mtx = mutexes.at(c), + c, + y, + n_x, + done = finished.data() + c * n_y + y](std::string& err) -> bool { + bool success = false; + try { + auto path = base / std::to_string(c); + { + std::unique_lock lock(*mtx); + while (!fs::exists(path)) { + lock.unlock(); + std::this_thread::sleep_for(1ms); + lock.lock(); + } + } + + path /= std::to_string(y); + + if (fs::exists(path)) { + EXPECT(fs::is_directory(path), + "%s must be a directory.", + path.c_str()); + } else { + EXPECT(fs::create_directories(path), + "Failed to create directory: %s", + path.c_str()); + } + + for (auto x = 0; x < n_x; ++x) { + auto& file = files[x]; + auto file_path = path / std::to_string(x); + + EXPECT(file_create(&file, + file_path.string().c_str(), + file_path.string().size()), + "Failed to open file: '%s'", + file_path.c_str()); + } + + success = true; + } catch (const std::exception& exc) { + char buf[128]; + snprintf(buf, + sizeof(buf), + "Failed to create directory: %s", + exc.what()); + err = buf; + } catch (...) { + err = "Failed to create directory (unknown)"; + } + + *done = 1; + return success; + }); + } + } + + while (!std::all_of( + finished.begin(), finished.end(), [](const auto& b) { return b != 0; })) { + std::this_thread::sleep_for(500us); + } + + return std::all_of( + finished.begin(), finished.end(), [](const auto& b) { return b == 1; }); +} + +/// Writer +zarr::Writer::Writer(const ImageDims& frame_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr) + : frame_dims_{ frame_dims } + , tile_dims_{ tile_dims } + , data_root_{ data_root } + , frames_per_chunk_{ frames_per_chunk } + , frames_written_{ 0 } + , bytes_to_flush_{ 0 } + , current_chunk_{ 0 } + , pixel_type_{ SampleTypeCount } + , buffers_ready_{ nullptr } + , zarr_{ zarr } + , file_creator_{ zarr } +{ + CHECK(tile_dims_.cols > 0); + CHECK(tile_dims_.rows > 0); + EXPECT(tile_dims_ <= frame_dims_, + "Expected tile dimensions to be less than or equal to frame " + "dimensions."); + + tiles_per_frame_y_ = + std::ceil((float)frame_dims.rows / (float)tile_dims.rows); + tiles_per_frame_x_ = + std::ceil((float)frame_dims.cols / (float)tile_dims.cols); + + CHECK(frames_per_chunk_ > 0); + CHECK(!data_root_.empty()); + + if (!fs::is_directory(data_root)) { + std::error_code ec; + EXPECT(fs::create_directories(data_root_, ec), + "Failed to create data root directory: %s", + ec.message().c_str()); + } +} + +zarr::Writer::Writer(const ImageDims& frame_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr, + const BloscCompressionParams& compression_params) + : Writer(frame_dims, tile_dims, frames_per_chunk, data_root, zarr) +{ + blosc_compression_params_ = compression_params; +} + +zarr::Writer::~Writer() +{ + delete[] buffers_ready_; +} + +void +zarr::Writer::finalize() noexcept +{ + using namespace std::chrono_literals; + finalize_chunks_(); + if (bytes_to_flush_ > 0) { + flush_(); + } + + close_files_(); +} + +uint32_t +zarr::Writer::frames_written() const noexcept +{ + return frames_written_; +} + +bool +zarr::Writer::validate_frame_(const VideoFrame* frame) noexcept +{ + try { + CHECK(frame); + + if (pixel_type_ == SampleTypeCount) { + pixel_type_ = frame->shape.type; + } else { + EXPECT(pixel_type_ == frame->shape.type, + "Expected frame to have pixel type %s. Got %s.", + common::sample_type_to_string(pixel_type_), + common::sample_type_to_string(frame->shape.type)); + } + + // validate the incoming frame shape against the stored frame dims + EXPECT(frame_dims_.cols == frame->shape.dims.width, + "Expected frame to have %d columns. Got %d.", + frame_dims_.cols, + frame->shape.dims.width); + EXPECT(frame_dims_.rows == frame->shape.dims.height, + "Expected frame to have %d rows. Got %d.", + frame_dims_.rows, + frame->shape.dims.height); + + return true; + } catch (const std::exception& exc) { + char buf[128]; + snprintf(buf, sizeof(buf), "Invalid frame: %s", exc.what()); + zarr_->set_error(buf); + } catch (...) { + char buf[32]; + snprintf(buf, sizeof(buf), "Invalid frame (unknown)"); + zarr_->set_error(buf); + } + return false; +} + +void +zarr::Writer::finalize_chunks_() noexcept +{ + using namespace std::chrono_literals; + + const auto frames_this_chunk = frames_written_ % frames_per_chunk_; + + // don't write zeros if we have written less than one full chunk or if + // the last frame written was the final frame in its chunk + if (frames_written_ < frames_per_chunk_ || frames_this_chunk == 0) { + return; + } + const auto bytes_per_frame = + frame_dims_.rows * frame_dims_.cols * common::bytes_of_type(pixel_type_); + const auto frames_to_write = frames_per_chunk_ - frames_this_chunk; + + bytes_to_flush_ += frames_to_write * bytes_per_frame; +} + +std::vector +zarr::Writer::compress_buffers_() noexcept +{ + const auto nchunks = tiles_per_frame_(); + + const size_t bytes_per_chunk = bytes_to_flush_ / nchunks; + std::vector buf_sizes; + if (!blosc_compression_params_.has_value()) { + for (auto& buf : chunk_buffers_) { + buf_sizes.push_back(std::min(bytes_per_chunk, buf.size())); + } + return buf_sizes; + } + using namespace std::chrono_literals; + + buf_sizes.resize(nchunks); + std::fill(buffers_ready_, buffers_ready_ + nchunks, false); + + TRACE("Compressing"); + + const auto bytes_of_type = common::bytes_of_type(pixel_type_); + + std::scoped_lock lock(mutex_); + for (auto i = 0; i < chunk_buffers_.size(); ++i) { + auto& buf = chunk_buffers_.at(i); + + zarr_->push_to_job_queue([params = blosc_compression_params_.value(), + buf = &buf, + bytes_of_type, + bytes_per_chunk, + finished = buffers_ready_ + i, + buf_size = buf_sizes.data() + + i](std::string& err) -> bool { + bool success = false; + try { + const auto tmp_size = bytes_per_chunk + BLOSC_MAX_OVERHEAD; + std::vector tmp(tmp_size); + const auto nb = + blosc_compress_ctx(params.clevel, + params.shuffle, + bytes_of_type, + bytes_per_chunk, + buf->data(), + tmp.data(), + tmp_size, + params.codec_id.c_str(), + 0 /* blocksize - 0:automatic */, + 1); + if (nb > buf->size()) { + buf->resize(nb); + } + memcpy(buf->data(), tmp.data(), nb); + *buf_size = nb; + + success = true; + } catch (const std::exception& exc) { + char msg[128]; + snprintf( + msg, sizeof(msg), "Failed to compress chunk: %s", exc.what()); + err = msg; + } catch (...) { + err = "Failed to compress chunk (unknown)"; + } + *finished = true; + + return success; + }); + } + + // wait for all threads to finish + while (!std::all_of(buffers_ready_, + buffers_ready_ + nchunks, + [](const auto& b) { return b; })) { + std::this_thread::sleep_for(500us); + } + + return buf_sizes; +} + +uint32_t +zarr::Writer::tiles_per_frame_() const +{ + return (uint32_t)tiles_per_frame_x_ * (uint32_t)tiles_per_frame_y_; +} + +void +zarr::Writer::close_files_() +{ + using namespace std::chrono_literals; + while (0 < zarr_->jobs_on_queue()) { + std::this_thread::sleep_for(2ms); + } + + for (auto& file : files_) { + file_close(&file); + } + files_.clear(); +} + +void +zarr::Writer::rollover_() +{ + TRACE("Rolling over"); + + close_files_(); + ++current_chunk_; +} diff --git a/src/writers/writer.hh b/src/writers/writer.hh new file mode 100644 index 00000000..6caa4e78 --- /dev/null +++ b/src/writers/writer.hh @@ -0,0 +1,116 @@ +#ifndef H_ACQUIRE_ZARR_WRITER_V0 +#define H_ACQUIRE_ZARR_WRITER_V0 + +#ifndef __cplusplus +#error "This header requires C++20" +#endif + +#include "platform.h" +#include "device/props/components.h" + +#include "../common.hh" +#include "blosc.compressor.hh" + +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace acquire::sink::zarr { +struct Zarr; + +struct FileCreator +{ + FileCreator() = delete; + explicit FileCreator(Zarr* zarr); + ~FileCreator() noexcept = default; + + void set_base_dir(const fs::path& base_dir) noexcept; + [[nodiscard]] bool create(int n_c, + int n_y, + int n_x, + std::vector& files) noexcept; + + private: + fs::path base_dir_; + + Zarr* zarr_; +}; + +struct Writer +{ + public: + Writer() = delete; + Writer(const ImageDims& frame_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr); + + /// Constructor with Blosc compression params + Writer(const ImageDims& frame_dims, + const ImageDims& tile_dims, + uint32_t frames_per_chunk, + const std::string& data_root, + Zarr* zarr, + const BloscCompressionParams& compression_params); + virtual ~Writer(); + + [[nodiscard]] virtual bool write(const VideoFrame* frame) noexcept = 0; + void finalize() noexcept; + + uint32_t frames_written() const noexcept; + + protected: + /// Tiling/chunking + ImageDims frame_dims_; + ImageDims tile_dims_; + uint16_t tiles_per_frame_x_; + uint16_t tiles_per_frame_y_; + SampleType pixel_type_; + uint32_t frames_per_chunk_; + + /// Compression + std::optional blosc_compression_params_; + // std::optional zstd_compression_params_; // TODO + + /// Filesystem + FileCreator file_creator_; + fs::path data_root_; + std::vector files_; + + /// Multithreading + std::vector> chunk_buffers_; + bool* buffers_ready_; + std::mutex mutex_; + + /// Bookkeeping + uint64_t bytes_to_flush_; + uint32_t frames_written_; + uint32_t current_chunk_; + Zarr* zarr_; + + [[nodiscard]] bool validate_frame_(const VideoFrame* frame) noexcept; + + virtual void make_buffers_() noexcept = 0; + + void finalize_chunks_() noexcept; + std::vector compress_buffers_() noexcept; + virtual size_t write_bytes_(const uint8_t* buf, + size_t buf_size) noexcept = 0; + virtual void flush_() noexcept = 0; + + uint32_t tiles_per_frame_() const; + + /// Files + [[nodiscard]] virtual bool make_files_() noexcept = 0; + void close_files_(); + void rollover_(); +}; +} // namespace acquire::sink::zarr + +#endif // H_ACQUIRE_ZARR_WRITER_V0 diff --git a/src/zarr.blosc.cpp b/src/zarr.blosc.cpp deleted file mode 100644 index a7dd54d7..00000000 --- a/src/zarr.blosc.cpp +++ /dev/null @@ -1,101 +0,0 @@ -#include "zarr.blosc.hh" -#include "zarr.hh" - -#include "logger.h" - -#include -#include - -namespace zarr = acquire::sink::zarr; -using json = nlohmann::json; - -namespace { -template -struct Storage* -compressed_zarr_init() -{ - try { - zarr::CompressionParams params( - zarr::compression_codec_as_string(), 1, 1); - return new zarr::Zarr(std::move(params)); - } catch (const std::exception& exc) { - LOGE("Exception: %s\n", exc.what()); - } catch (...) { - LOGE("Exception: (unknown)"); - } - return nullptr; -} -} // end ::{anonymous} namespace - -// -// zarr namespace implementations -// - -void -zarr::to_json(json& j, const zarr::CompressionParams& bc) -{ - j = json{ { "id", std::string(bc.id_) }, - { "cname", bc.codec_id_ }, - { "clevel", bc.clevel_ }, - { "shuffle", bc.shuffle_ } }; -} - -void -zarr::from_json(const json& j, zarr::CompressionParams& bc) -{ - j.at("cname").get_to(bc.codec_id_); - j.at("clevel").get_to(bc.clevel_); - j.at("shuffle").get_to(bc.shuffle_); -} - -zarr::BloscEncoder::BloscEncoder(const CompressionParams& compressor) - : compressor_{ compressor } -{ -} - -zarr::BloscEncoder::~BloscEncoder() noexcept -{ - try { - flush(); - } catch (const std::exception& exc) { - LOGE("Exception: %s\n", exc.what()); - } catch (...) { - LOGE("Exception: (unknown)"); - } -} - -size_t -zarr::BloscEncoder::flush_impl() -{ - auto* buf_c = new uint8_t[cursor_ + BLOSC_MAX_OVERHEAD]; - CHECK(buf_c); - - const auto nbytes_out = - (size_t)blosc_compress_ctx(compressor_.clevel_, - compressor_.shuffle_, - bytes_per_pixel_, - cursor_, - buf_.data(), - buf_c, - cursor_ + BLOSC_MAX_OVERHEAD, - compressor_.codec_id_.c_str(), - 0 /* blocksize - 0:automatic */, - (int)std::thread::hardware_concurrency()); - - CHECK(file_write(file_, 0, buf_c, buf_c + nbytes_out)); - - delete[] buf_c; - return nbytes_out; -} - -extern "C" struct Storage* -compressed_zarr_zstd_init() -{ - return compressed_zarr_init(); -} - -extern "C" struct Storage* -compressed_zarr_lz4_init() -{ - return compressed_zarr_init(); -} diff --git a/src/zarr.blosc.hh b/src/zarr.blosc.hh deleted file mode 100644 index ad77f770..00000000 --- a/src/zarr.blosc.hh +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef H_ACQUIRE_STORAGE_ZARR_BLOSC_V0 -#define H_ACQUIRE_STORAGE_ZARR_BLOSC_V0 - -#ifdef __cplusplus - -#include "zarr.encoder.hh" - -#include "blosc.h" - -namespace acquire::sink::zarr { - -struct CompressionParams -{ - static constexpr char id_[] = "blosc"; - std::string codec_id_; - int clevel_; - int shuffle_; - - CompressionParams(); - CompressionParams(const std::string& codec_id, int clevel, int shuffle); -}; - -void -to_json(nlohmann::json&, const CompressionParams&); - -void -from_json(const nlohmann::json&, CompressionParams&); - -enum class BloscCodecId -{ - Lz4 = 0, - Zstd, -}; - -template -constexpr const char* -compression_codec_as_string(); - -template<> -constexpr const char* -compression_codec_as_string() -{ - return "zstd"; -} - -template<> -constexpr const char* -compression_codec_as_string() -{ - return "lz4"; -} - -struct BloscEncoder final : public BaseEncoder -{ - public: - explicit BloscEncoder(const CompressionParams& compressor); - ~BloscEncoder() noexcept override; - - private: - size_t flush_impl() override; - - CompressionParams compressor_; -}; -} // namespace acquire::sink::zarr - -#endif // __cplusplus -#endif // H_ACQUIRE_STORAGE_ZARR_BLOSC_V0 \ No newline at end of file diff --git a/src/zarr.cpp b/src/zarr.cpp index 8aa124e4..1f91c5a2 100644 --- a/src/zarr.cpp +++ b/src/zarr.cpp @@ -1,52 +1,83 @@ #include "zarr.hh" -#include "device/kit/storage.h" -#include "logger.h" -#include "platform.h" -#include "zarr.raw.hh" - -#include -#include -#include -#include - +#include "writers/chunk.writer.hh" #include "json.hpp" -namespace fs = std::filesystem; namespace zarr = acquire::sink::zarr; - -// -// Private namespace -// +using json = nlohmann::json; namespace { +/// \brief Check that the JSON string is valid. (Valid can mean empty.) +/// \param str Putative JSON metadata string. +/// \param nbytes Size of the JSON metadata char array +void +validate_json(const char* str, size_t nbytes) +{ + // Empty strings are valid (no metadata is fine). + if (nbytes <= 1 || nullptr == str) { + return; + } -// Forward declarations + // Don't do full json validation here, but make sure it at least + // begins and ends with '{' and '}' + EXPECT(nbytes >= 3, + "nbytes (%d) is too small. Expected a null-terminated json string.", + (int)nbytes); + EXPECT(str[nbytes - 1] == '\0', "String must be null-terminated"); + EXPECT(str[0] == '{', "json string must start with \'{\'"); + EXPECT(str[nbytes - 2] == '}', "json string must end with \'}\'"); +} -DeviceState -zarr_set(Storage*, const StorageProperties* props) noexcept; +/// \brief Get the filename from a StorageProperties as fs::path. +/// \param props StorageProperties for the Zarr Storage device. +/// \return fs::path representation of the Zarr data directory. +fs::path +as_path(const StorageProperties& props) +{ + return { props.filename.str, + props.filename.str + props.filename.nbytes - 1 }; +} +/// \brief Check that the StorageProperties are valid. +/// \details Assumes either an empty or valid JSON metadata string and a +/// filename string that points to a writable directory. \param props Storage +/// properties for Zarr. \throw std::runtime_error if the parent of the Zarr +/// data directory is not an existing directory. void -zarr_get(const Storage*, StorageProperties* props) noexcept; +validate_props(const StorageProperties* props) +{ + EXPECT(props->filename.str, "Filename string is NULL."); + EXPECT(props->filename.nbytes, "Filename string is zero size."); -void -zarr_get_meta(const Storage*, StoragePropertyMetadata* meta) noexcept; + // check that JSON is correct (throw std::exception if not) + validate_json(props->external_metadata_json.str, + props->external_metadata_json.nbytes); -DeviceState -zarr_start(Storage*) noexcept; + // check that the filename value points to a writable directory + { -DeviceState -zarr_append(Storage* self_, const VideoFrame* frame, size_t* nbytes) noexcept; + auto path = as_path(*props); + auto parent_path = path.parent_path().string(); + if (parent_path.empty()) + parent_path = "."; -DeviceState -zarr_stop(Storage*) noexcept; + EXPECT(fs::is_directory(parent_path), + "Expected \"%s\" to be a directory.", + parent_path.c_str()); -void -zarr_destroy(Storage*) noexcept; + // check directory is writable + EXPECT(fs::is_directory(parent_path), + "Expected \"%s\" to be a directory.", + parent_path.c_str()); -// -// STORAGE C API IMPLEMENTATIONS -// + const auto perms = fs::status(fs::path(parent_path)).permissions(); + + EXPECT((perms & (fs::perms::owner_write | fs::perms::group_write | + fs::perms::others_write)) != fs::perms::none, + "Expected \"%s\" to have write permissions.", + parent_path.c_str()); + } +} DeviceState zarr_set(Storage* self_, const StorageProperties* props) noexcept @@ -184,49 +215,129 @@ zarr_reserve_image_shape(Storage* self_, const ImageShape* shape) noexcept LOGE("Exception: (unknown)"); } } -} // end namespace ::{anonymous} - -// -// zarr namespace implementations -// -zarr::Zarr::Zarr() - : dimension_separator_{ '/' } - , frame_count_{ 0 } - , pixel_scale_um_{ 1, 1 } - , max_bytes_per_chunk_{ 0 } - , image_shape_{ 0 } - , tile_shape_{ 0 } - , thread_pool_(std::thread::hardware_concurrency()) +void +make_scales(std::vector>& shapes) { - start_threads_(); + CHECK(shapes.size() == 1); + const auto base_image_shape = shapes.at(0).first; + const auto base_tile_shape = shapes.at(0).second; + + const int downscale = 2; + + uint32_t w = base_image_shape.cols; + uint32_t h = base_image_shape.rows; + + while (w > base_tile_shape.cols || h > base_tile_shape.rows) { + w = (w + (w % downscale)) / downscale; + h = (h + (h % downscale)) / downscale; + + zarr::ImageDims im_shape = base_image_shape; + im_shape.cols = w; + im_shape.rows = h; + + zarr::ImageDims tile_shape = base_tile_shape; + if (tile_shape.cols > w) + tile_shape.cols = w; + + if (tile_shape.rows > h) + tile_shape.rows = h; + + shapes.emplace_back(im_shape, tile_shape); + } } -zarr::Zarr::Zarr(CompressionParams&& compression_params) - : dimension_separator_{ '/' } - , frame_count_{ 0 } - , pixel_scale_um_{ 1, 1 } - , max_bytes_per_chunk_{ 0 } - , image_shape_{ 0 } - , tile_shape_{ 0 } - , thread_pool_(std::thread::hardware_concurrency()) +template +VideoFrame* +scale_image(const VideoFrame* src) { - compression_params_ = std::move(compression_params); - start_threads_(); + CHECK(src); + const int downscale = 2; + constexpr size_t bytes_of_type = sizeof(T); + const auto factor = 0.25f; + + const auto width = src->shape.dims.width; + const auto w_pad = width + (width % downscale); + + const auto height = src->shape.dims.height; + const auto h_pad = height + (height % downscale); + + auto* dst = (VideoFrame*)malloc(sizeof(VideoFrame) + + w_pad * h_pad * factor * sizeof(T)); + memcpy(dst, src, sizeof(VideoFrame)); + + dst->shape.dims.width = w_pad / downscale; + dst->shape.dims.height = h_pad / downscale; + dst->shape.strides.height = + dst->shape.strides.width * dst->shape.dims.width; + dst->shape.strides.planes = + dst->shape.strides.height * dst->shape.dims.height; + + dst->bytes_of_frame = + dst->shape.dims.planes * dst->shape.strides.planes * sizeof(T) + + sizeof(*dst); + + const auto* src_img = (T*)src->data; + auto* dst_img = (T*)dst->data; + memset(dst_img, 0, dst->bytes_of_frame - sizeof(*dst)); + + size_t dst_idx = 0; + for (auto row = 0; row < height; row += downscale) { + const bool pad_height = (row == height - 1 && height != h_pad); + + for (auto col = 0; col < width; col += downscale) { + const bool pad_width = (col == width - 1 && width != w_pad); + + size_t idx = row * width + col; + dst_img[dst_idx++] = + (T)(factor * + ((float)src_img[idx] + + (float)src_img[idx + (1 - (int)pad_width)] + + (float)src_img[idx + width * (1 - (int)pad_height)] + + (float)src_img[idx + width * (1 - (int)pad_height) + + (1 - (int)pad_width)])); + } + } + + return dst; } -zarr::Zarr::~Zarr() +/// @brief Average both `dst` and `src` into `dst`. +template +void +average_two_frames(VideoFrame* dst, const VideoFrame* src) { - if (!stop()) { - LOGE("Failed to stop on destruct!"); + CHECK(dst); + CHECK(src); + CHECK(dst->bytes_of_frame == src->bytes_of_frame); + + const auto bytes_of_image = dst->bytes_of_frame - sizeof(*dst); + const auto num_pixels = bytes_of_image / sizeof(T); + for (auto i = 0; i < num_pixels; ++i) { + dst->data[i] = (T)(((float)dst->data[i] + (float)src->data[i]) / 2.0f); } - recover_threads_(); +} +} // end ::{anonymous} namespace + +/// StorageInterface +zarr::StorageInterface::StorageInterface() + : Storage{ + .state = DeviceState_AwaitingConfiguration, + .set = ::zarr_set, + .get = ::zarr_get, + .get_meta = ::zarr_get_meta, + .start = ::zarr_start, + .append = ::zarr_append, + .stop = ::zarr_stop, + .destroy = ::zarr_destroy, + .reserve_image_shape = ::zarr_reserve_image_shape, + } +{ } void zarr::Zarr::set(const StorageProperties* props) { - using namespace acquire::sink::zarr; CHECK(props); StoragePropertyMetadata meta{}; @@ -234,7 +345,7 @@ zarr::Zarr::set(const StorageProperties* props) // checks the directory exists and is writable validate_props(props); - data_dir_ = as_path(*props).string(); + dataset_root_ = as_path(*props); if (props->external_metadata_json.str) external_metadata_json_ = props->external_metadata_json.str; @@ -242,56 +353,56 @@ zarr::Zarr::set(const StorageProperties* props) pixel_scale_um_ = props->pixel_scale_um; // chunking + image_tile_shapes_.clear(); + image_tile_shapes_.emplace_back(); + set_chunking(props->chunking, meta.chunking); - // hang on to this until we have the image shape - enable_multiscale_ = (bool)props->enable_multiscale; + if (props->enable_multiscale && !meta.multiscale.supported) { + // TODO (aliddell): https://github.com/ome/ngff/pull/206 + LOGE("OME-Zarr multiscale not yet supported in Zarr v3. " + "Multiscale arrays will not be written."); + } + enable_multiscale_ = meta.multiscale.supported && props->enable_multiscale; } void zarr::Zarr::get(StorageProperties* props) const { CHECK(storage_properties_set_filename( - props, data_dir_.c_str(), data_dir_.size())); + props, dataset_root_.string().c_str(), dataset_root_.string().size())); CHECK(storage_properties_set_external_metadata( props, external_metadata_json_.c_str(), external_metadata_json_.size())); props->pixel_scale_um = pixel_scale_um_; - props->chunking.tile.width = tile_shape_.width; - props->chunking.tile.height = tile_shape_.height; - props->chunking.tile.planes = tile_shape_.planes; + if (!image_tile_shapes_.empty()) { + props->chunking.tile.width = image_tile_shapes_.at(0).second.cols; + props->chunking.tile.height = image_tile_shapes_.at(0).second.rows; + } + props->chunking.tile.planes = 1; props->enable_multiscale = enable_multiscale_; } -void -zarr::Zarr::get_meta(StoragePropertyMetadata* meta) const -{ - CHECK(meta); - *meta = { - .chunking = { - .supported = 1, - .max_bytes_per_chunk = { - .writable = 1, - .low = (float)(16 << 20), - .high = (float)(1 << 30), - .type = PropertyType_FixedPrecision }, - }, - .multiscale = { - .supported = 1, - } - }; -} - void zarr::Zarr::start() { - frame_count_ = 0; - create_data_directory_(); - write_zgroup_json_(); - write_group_zattrs_json_(); - write_zarray_json_(); - write_external_metadata_json_(); + error_ = true; + if (fs::exists(dataset_root_)) { + std::error_code ec; + EXPECT(fs::remove_all(dataset_root_, ec), + R"(Failed to remove folder for "%s": %s)", + dataset_root_.c_str(), + ec.message().c_str()); + } + fs::create_directories(dataset_root_); + + write_base_metadata_(); + write_group_metadata_(); + write_all_array_metadata_(); + write_external_metadata_(); + + error_ = false; } int @@ -304,15 +415,14 @@ zarr::Zarr::stop() noexcept is_ok = 0; try { - while (!job_queue_.empty()) { - TRACE("Cycling: %llu jobs remaining", job_queue_.size()); - clock_sleep_ms(nullptr, 50.0); + write_all_array_metadata_(); // must precede close of chunk file + write_group_metadata_(); + + for (auto& writer : writers_) { + writer->finalize(); } - recover_threads_(); - write_zarray_json_(); // must precede close of chunk file - write_group_zattrs_json_(); // write multiscales metadata + writers_.clear(); is_ok = 1; - frame_count_ = 0; } catch (const std::exception& exc) { LOGE("Exception: %s\n", exc.what()); } catch (...) { @@ -326,15 +436,11 @@ zarr::Zarr::stop() noexcept size_t zarr::Zarr::append(const VideoFrame* frames, size_t nbytes) { - if (0 == nbytes) - return nbytes; - - // validate start conditions - if (0 == frame_count_) { - validate_image_and_tile_shapes_(); - } // TODO (aliddell): make this a function + EXPECT(!error_, "%s", error_msg_.c_str()); - using namespace acquire::sink::zarr; + if (0 == nbytes) { + return nbytes; + } const VideoFrame* cur = nullptr; const auto* end = (const VideoFrame*)((uint8_t*)frames + nbytes); @@ -344,78 +450,64 @@ zarr::Zarr::append(const VideoFrame* frames, size_t nbytes) }; for (cur = frames; cur < end; cur = next()) { - // handle incoming image shape - validate_image_shapes_equal(image_shape_, cur->shape); - - // create a new frame - auto frame = - std::make_shared(cur, image_shape_, tile_shape_); + EXPECT(writers_.at(0)->write(cur), "%s", error_msg_.c_str()); - if (frame_scaler_) { - std::scoped_lock lock(job_queue_mutex_); - - // push the new frame to our scaler - job_queue_.emplace( - [this, frame]() { return frame_scaler_->push_frame(frame); }); - } else { - push_frame_to_writers(frame); + // multiscale + if (writers_.size() > 1) { + write_multiscale_frames_(cur); } - - ++frame_count_; } - return nbytes; } void zarr::Zarr::reserve_image_shape(const ImageShape* shape) { - // `shape` should be verified nonnull in storage_reserve_image_shape, but let's check anyway + // `shape` should be verified nonnull in storage_reserve_image_shape, but + // let's check anyway CHECK(shape); - image_shape_ = *shape; + image_tile_shapes_.at(0).first = { + .cols = shape->dims.width, + .rows = shape->dims.height, + }; + pixel_type_ = shape->type; + + ImageDims& image_shape = image_tile_shapes_.at(0).first; + ImageDims& tile_shape = image_tile_shapes_.at(0).second; // ensure that tile dimensions are compatible with the image shape { StorageProperties props = { 0 }; get(&props); - uint32_t tile_width = props.chunking.tile.width; - if (image_shape_.dims.width > 0 && - (tile_width == 0 || tile_width > image_shape_.dims.width)) { + if (image_shape.cols > 0 && + (tile_width == 0 || tile_width > image_shape.cols)) { LOGE("%s. Setting width to %u.", tile_width == 0 ? "Tile width not specified" : "Specified tile width is too large", - image_shape_.dims.width); - tile_width = image_shape_.dims.width; + image_shape.cols); + tile_width = image_shape.cols; } - tile_shape_.width = tile_width; + tile_shape.cols = tile_width; uint32_t tile_height = props.chunking.tile.height; - if (image_shape_.dims.height > 0 && - (tile_height == 0 || tile_height > image_shape_.dims.height)) { + if (image_shape.rows > 0 && + (tile_height == 0 || tile_height > image_shape.rows)) { LOGE("%s. Setting height to %u.", tile_height == 0 ? "Tile height not specified" : "Specified tile height is too large", - image_shape_.dims.height); - tile_height = image_shape_.dims.height; - } - tile_shape_.height = tile_height; - - uint32_t tile_planes = props.chunking.tile.planes; - if (image_shape_.dims.planes > 0 && - (tile_planes == 0 || tile_planes > image_shape_.dims.planes)) { - LOGE("%s. Setting planes to %u.", - tile_planes == 0 ? "Tile plane count not specified" - : "Specified tile plane count is too large", - image_shape_.dims.planes); - tile_planes = image_shape_.dims.planes; + image_shape.rows); + tile_height = image_shape.rows; } - tile_shape_.planes = tile_planes; + tile_shape.rows = tile_height; + storage_properties_destroy(&props); } // ensure that the chunk size can accommodate at least one tile - uint64_t bytes_per_tile = get_bytes_per_tile(image_shape_, tile_shape_); + uint64_t bytes_per_tile = common::bytes_per_tile(tile_shape, pixel_type_); + CHECK(bytes_per_tile > 0); + if (max_bytes_per_chunk_ < bytes_per_tile) { LOGE("Specified chunk size %llu is too small. Setting to %llu bytes.", max_bytes_per_chunk_, @@ -424,619 +516,275 @@ zarr::Zarr::reserve_image_shape(const ImageShape* shape) } if (enable_multiscale_) { - frame_scaler_.emplace(this, image_shape_, tile_shape_); + make_scales(image_tile_shapes_); } allocate_writers_(); -} -void -zarr::Zarr::push_frame_to_writers(const std::shared_ptr frame) -{ - std::scoped_lock lock(job_queue_mutex_); - auto writer = writers_.at(frame->layer()); - - for (auto& w : writer) { - job_queue_.emplace([w, frame]() { return w->write_frame(*frame); }); + // multiscale + for (auto i = 1; i < writers_.size(); ++i) { + scaled_frames_.insert_or_assign(i, std::nullopt); } } -std::optional -zarr::Zarr::pop_from_job_queue() -{ - std::scoped_lock lock(job_queue_mutex_); - if (job_queue_.empty()) - return {}; - - auto job = job_queue_.front(); - job_queue_.pop(); - - return { job }; -} - -void -zarr::Zarr::set_chunking(const ChunkingProps& props, const ChunkingMeta& meta) -{ - max_bytes_per_chunk_ = std::clamp(props.max_bytes_per_chunk, - (uint64_t)meta.max_bytes_per_chunk.low, - (uint64_t)meta.max_bytes_per_chunk.high); +/// Zarr - tile_shape_ = { - .width = props.tile.width, - .height = props.tile.height, - .planes = props.tile.planes, - }; -} - -void -zarr::Zarr::create_data_directory_() const +zarr::Zarr::Zarr() + : threads_(std::thread::hardware_concurrency()) { - namespace fs = std::filesystem; - if (fs::exists(data_dir_)) { - std::error_code ec; - EXPECT(fs::remove_all(data_dir_, ec), - R"(Failed to remove folder for "%s": %s)", - data_dir_.c_str(), - ec.message().c_str()); + // spin up threads + for (auto& ctx_ : threads_) { + ctx_.ready = true; + ctx_.should_stop = false; + ctx_.thread = std::thread([this, ctx = &ctx_] { worker_thread_(ctx); }); } - - EXPECT(fs::create_directory(data_dir_), - "Failed to create folder for \"%s\"", - data_dir_.c_str()); } -void -zarr::Zarr::write_zarray_json_() const +zarr::Zarr::Zarr(BloscCompressionParams&& compression_params) + : Zarr() { - namespace fs = std::filesystem; - using json = nlohmann::json; - - if (writers_.empty()) { - write_zarray_json_inner_(0, image_shape_, tile_shape_); - } else { - for (const auto& [layer, writers] : writers_) { - auto writer = writers.front(); - const auto& is = writer->image_shape(); - const auto& ts = writer->tile_shape(); - write_zarray_json_inner_(layer, is, ts); - } - } + blosc_compression_params_ = std::move(compression_params); } -void -zarr::Zarr::write_zarray_json_inner_(size_t layer, - const ImageShape& image_shape, - const TileShape& tile_shape) const +zarr::Zarr::~Zarr() noexcept { - namespace fs = std::filesystem; - using json = nlohmann::json; - - if (!writers_.contains(layer)) { - return; + // spin down threads + for (auto& ctx : threads_) { + ctx.should_stop = true; + ctx.cv.notify_one(); + ctx.thread.join(); } - - const uint64_t frame_count = writers_.at(layer).front()->frames_written(); - const auto frames_per_chunk = - std::min(frame_count, - (uint64_t)get_tiles_per_chunk( - image_shape, tile_shape, max_bytes_per_chunk_)); - - json zarray_attrs = { - { "zarr_format", 2 }, - { "shape", - { - frame_count, - image_shape.dims.channels, - image_shape.dims.height, - image_shape.dims.width, - } }, - { "chunks", - { - frames_per_chunk, - 1, - tile_shape.height, - tile_shape.width, - } }, - { "dtype", sample_type_to_dtype(image_shape.type) }, - { "fill_value", 0 }, - { "order", "C" }, - { "filters", nullptr }, - { "dimension_separator", std::string(1, dimension_separator_) }, - }; - - if (compression_params_.has_value()) - zarray_attrs["compressor"] = compression_params_.value(); - else - zarray_attrs["compressor"] = nullptr; - - std::string zarray_path = - (fs::path(data_dir_) / std::to_string(layer) / ".zarray").string(); - write_string(zarray_path, zarray_attrs.dump()); } void -zarr::Zarr::write_external_metadata_json_() const +zarr::Zarr::set_chunking(const ChunkingProps& props, const ChunkingMeta& meta) { - namespace fs = std::filesystem; - using json = nlohmann::json; - - std::string zattrs_path = (fs::path(data_dir_) / "0" / ".zattrs").string(); - write_string(zattrs_path, external_metadata_json_); -} + max_bytes_per_chunk_ = std::clamp(props.max_bytes_per_chunk, + (uint64_t)meta.max_bytes_per_chunk.low, + (uint64_t)meta.max_bytes_per_chunk.high); -void -zarr::Zarr::write_group_zattrs_json_() const -{ - namespace fs = std::filesystem; - using json = nlohmann::json; - - json zgroup_attrs; - zgroup_attrs["multiscales"] = json::array({ json::object() }); - zgroup_attrs["multiscales"][0]["version"] = "0.4"; - zgroup_attrs["multiscales"][0]["axes"] = { - { - { "name", "t" }, - { "type", "time" }, - }, - { - { "name", "c" }, - { "type", "channel" }, - }, - { - { "name", "y" }, - { "type", "space" }, - { "unit", "micrometer" }, - }, - { - { "name", "x" }, - { "type", "space" }, - { "unit", "micrometer" }, - }, + // image shape is set *after* this is set so we verify it later + image_tile_shapes_.at(0).second = { + .cols = props.tile.width, + .rows = props.tile.height, }; - - // spatial multiscale metadata - if (writers_.empty() || !frame_scaler_.has_value()) { - zgroup_attrs["multiscales"][0]["datasets"] = { - { - { "path", "0" }, - { "coordinateTransformations", - { - { - { "type", "scale" }, - { "scale", { 1, 1, pixel_scale_um_.y, pixel_scale_um_.x } }, - }, - } }, - }, - }; - } else { - for (const auto& [layer, _] : writers_) { - zgroup_attrs["multiscales"][0]["datasets"].push_back({ - { "path", std::to_string(layer) }, - { "coordinateTransformations", - { - { - { "type", "scale" }, - { "scale", - { std::pow(2, layer), - 1, - std::pow(2, layer) * pixel_scale_um_.y, - std::pow(2, layer) * pixel_scale_um_.x } }, - }, - } }, - }); - } - - // downsampling metadata - zgroup_attrs["multiscales"][0]["type"] = "local_mean"; - zgroup_attrs["multiscales"][0]["metadata"] = { - { "description", - "The fields in the metadata describe how to reproduce this " - "multiscaling in scikit-image. The method and its parameters are " - "given here." }, - { "method", "skimage.transform.downscale_local_mean" }, - { "version", "0.21.0" }, - { "args", "[2]" }, - { "kwargs", { "cval", 0 } }, - }; - } - - std::string zattrs_path = (fs::path(data_dir_) / ".zattrs").string(); - write_string(zattrs_path, zgroup_attrs.dump(4)); } void -zarr::Zarr::write_zgroup_json_() const +zarr::Zarr::set_error(const std::string& msg) noexcept { - namespace fs = std::filesystem; - using json = nlohmann::json; + std::scoped_lock lock(mutex_); - const json zgroup = { { "zarr_format", 2 } }; - std::string zgroup_path = (fs::path(data_dir_) / ".zgroup").string(); - write_string(zgroup_path, zgroup.dump()); -} - -void -zarr::Zarr::allocate_writers_() -{ - writers_.clear(); - - std::vector scaling_params; - if (frame_scaler_) { - scaling_params = make_scaling_parameters(image_shape_, tile_shape_); - } else { - scaling_params.emplace_back(image_shape_, tile_shape_); - } - - for (auto layer = 0; layer < scaling_params.size(); ++layer) { - auto multiscale = scaling_params.at(layer); - auto& image_shape = multiscale.image_shape; - auto& tile_shape = multiscale.tile_shape; - - CHECK(tile_shape.width > 0); - size_t img_px_x = image_shape.dims.channels * image_shape.dims.width; - size_t tile_cols = std::ceil((float)img_px_x / (float)tile_shape.width); - - size_t img_px_y = image_shape_.dims.height; - CHECK(tile_shape_.height > 0); - size_t tile_rows = - std::ceil((float)img_px_y / (float)tile_shape_.height); - - size_t img_px_p = image_shape_.dims.planes; - CHECK(tile_shape_.planes > 0); - size_t tile_planes = - std::ceil((float)img_px_p / (float)tile_shape_.planes); - - TRACE("Allocating %llu writers for layer %d", - tile_cols * tile_rows * tile_planes, - layer); - - size_t buf_size = - compression_params_.has_value() - ? get_bytes_per_chunk(image_shape, tile_shape, max_bytes_per_chunk_) - : get_bytes_per_tile(image_shape, tile_shape); - - writers_.emplace(layer, std::vector>()); - for (auto plane = 0; plane < tile_planes; ++plane) { - for (auto row = 0; row < tile_rows; ++row) { - for (auto col = 0; col < tile_cols; ++col) { - BaseEncoder* encoder; - if (compression_params_.has_value()) { - CHECK(encoder = - new BloscEncoder(compression_params_.value())); - } else { - CHECK(encoder = new RawEncoder()); - } - - encoder->allocate_buffer(buf_size); - encoder->set_bytes_per_pixel( - bytes_per_sample_type(image_shape_.type)); - writers_.at(layer).push_back( - std::make_shared(encoder, - image_shape, - tile_shape, - layer, - col, - row, - plane, - max_bytes_per_chunk_, - dimension_separator_, - data_dir_)); - } - } - } + // don't overwrite the first error + if (!error_) { + error_ = true; + error_msg_ = msg; } } void -zarr::Zarr::validate_image_and_tile_shapes_() const +zarr::Zarr::push_to_job_queue(JobT&& job) { - CHECK(image_shape_.dims.channels > 0); - CHECK(image_shape_.dims.width > 0); - CHECK(image_shape_.dims.height > 0); - CHECK(image_shape_.dims.planes > 0); - CHECK(tile_shape_.width > 0); - CHECK(tile_shape_.width <= image_shape_.dims.width); - CHECK(tile_shape_.height > 0); - CHECK(tile_shape_.height <= image_shape_.dims.height); - CHECK(tile_shape_.planes > 0); - CHECK(tile_shape_.planes <= image_shape_.dims.planes); + std::scoped_lock lock(mutex_); + jobs_.push(std::move(job)); } -void -zarr::Zarr::start_threads_() +size_t +zarr::Zarr::jobs_on_queue() const { - for (auto& ctx : thread_pool_) { - std::scoped_lock lock(ctx.mutex); - ctx.zarr = this; - ctx.should_stop = false; - ctx.thread = std::thread(worker_thread, &ctx); - ctx.cv.notify_one(); - } + std::scoped_lock lock(mutex_); + return jobs_.size(); } void -zarr::Zarr::recover_threads_() +zarr::Zarr::write_all_array_metadata_() const { - for (auto& ctx : thread_pool_) { - { - std::scoped_lock lock(ctx.mutex); - ctx.should_stop = true; - ctx.cv.notify_one(); - } + namespace fs = std::filesystem; - if (ctx.thread.joinable()) { - ctx.thread.join(); - } + for (auto i = 0; i < image_tile_shapes_.size(); ++i) { + write_array_metadata_(i); } } -/// \brief Check that the StorageProperties are valid. -/// \details Assumes either an empty or valid JSON metadata string and a -/// filename string that points to a writable directory. \param props Storage -/// properties for Zarr. \throw std::runtime_error if the parent of the Zarr -/// data directory is not an existing directory. void -zarr::validate_props(const StorageProperties* props) -{ - EXPECT(props->filename.str, "Filename string is NULL."); - EXPECT(props->filename.nbytes, "Filename string is zero size."); - - // check that JSON is correct (throw std::exception if not) - validate_json(props->external_metadata_json.str, - props->external_metadata_json.nbytes); - - // check that the filename value points to a writable directory - { - - auto path = as_path(*props); - auto parent_path = path.parent_path().string(); - if (parent_path.empty()) - parent_path = "."; - - EXPECT(fs::is_directory(parent_path), - "Expected \"%s\" to be a directory.", - parent_path.c_str()); - validate_directory_is_writable(parent_path); +zarr::Zarr::write_multiscale_frames_(const VideoFrame* frame) +{ + const VideoFrame* src = frame; + VideoFrame* dst; + + std::function scale; + std::function average2; + switch (frame->shape.type) { + case SampleType_u10: + case SampleType_u12: + case SampleType_u14: + case SampleType_u16: + scale = ::scale_image; + average2 = ::average_two_frames; + break; + case SampleType_i8: + scale = ::scale_image; + average2 = ::average_two_frames; + break; + case SampleType_i16: + scale = ::scale_image; + average2 = ::average_two_frames; + break; + case SampleType_f32: + scale = ::scale_image; + average2 = ::average_two_frames; + break; + case SampleType_u8: + scale = ::scale_image; + average2 = ::average_two_frames; + break; + default: + char err_msg[64]; + snprintf(err_msg, + sizeof(err_msg), + "Unsupported pixel type: %s", + common::sample_type_to_string(frame->shape.type)); + throw std::runtime_error(err_msg); } -} -/// \brief Get the filename from a StorageProperties as fs::path. -/// \param props StorageProperties for the Zarr Storage device. -/// \return fs::path representation of the Zarr data directory. -fs::path -zarr::as_path(const StorageProperties& props) -{ - return { props.filename.str, - props.filename.str + props.filename.nbytes - 1 }; -} + for (auto i = 1; i < writers_.size(); ++i) { + dst = scale(src); + if (scaled_frames_.at(i).has_value()) { + // average + average2(dst, scaled_frames_.at(i).value()); -/// \brief Check that two ImageShapes are equivalent, i.e., that the data types -/// agree and the dimensions are equal. -/// \param lhs An ImageShape. -/// \param rhs Another ImageShape. -/// \throw std::runtime_error if the ImageShapes have different data types or -/// dimensions. -void -zarr::validate_image_shapes_equal(const ImageShape& lhs, const ImageShape& rhs) -{ - EXPECT(lhs.type == rhs.type, - "Datatype mismatch! Expected: %s. Got: %s.", - sample_type_to_string(lhs.type), - sample_type_to_string(rhs.type)); - - EXPECT(lhs.dims.channels == rhs.dims.channels && - lhs.dims.width == rhs.dims.width && - lhs.dims.height == rhs.dims.height, - "Dimension mismatch! Expected: (%d, %d, %d). Got (%d, %d, " - "%d)", - lhs.dims.channels, - lhs.dims.width, - lhs.dims.height, - rhs.dims.channels, - rhs.dims.width, - rhs.dims.height); -} - -/// \brief Get the Zarr dtype for a given SampleType. -/// \param t An enumerated sample type. -/// \throw std::runtime_error if \par t is not a valid SampleType. -/// \return A representation of the SampleType \par t expected by a Zarr reader. -const char* -zarr::sample_type_to_dtype(SampleType t) -{ - static const char* table[] = { "write(dst)); -/// \brief Get a string representation of the SampleType enum. -/// \param t An enumerated sample type. -/// \return A human-readable representation of the SampleType \par t. -const char* -zarr::sample_type_to_string(SampleType t) noexcept -{ - static const char* table[] = { "u8", "u16", "i8", "i16", - "f32", "u16", "u16", "u16" }; - if (t < countof(table)) { - return table[t]; - } else { - return "unrecognized pixel type"; - } -} + // clean up this level of detail + free(scaled_frames_.at(i).value()); + scaled_frames_.at(i).reset(); -/// \brief Get the number of bytes for a given SampleType. -/// \param t An enumerated sample type. -/// \return The number of bytes the SampleType \par t represents. -size_t -zarr::bytes_per_sample_type(SampleType t) noexcept -{ - static size_t table[] = { 1, 2, 1, 2, 4, 2, 2, 2 }; - if (t < countof(table)) { - return table[t]; - } else { - LOGE("Invalid sample type."); - return 0; + // setup for next iteration + if (i + 1 < writers_.size()) { + src = dst; + } else { + free(dst); // FIXME (aliddell): find a way to reuse + } + } else { + scaled_frames_.at(i) = dst; + break; + } } } -/// \brief Check that the JSON string is valid. (Valid can mean empty.) -/// \param str Putative JSON metadata string. -/// \param nbytes Size of the JSON metadata char array -void -zarr::validate_json(const char* str, size_t nbytes) +std::optional +zarr::Zarr::pop_from_job_queue_() noexcept { - // Empty strings are valid (no metadata is fine). - if (nbytes <= 1 || nullptr == str) { - return; + std::scoped_lock lock(mutex_); + if (jobs_.empty()) { + return std::nullopt; } - // Don't do full json validation here, but make sure it at least - // begins and ends with '{' and '}' - EXPECT(nbytes >= 3, - "nbytes (%d) is too small. Expected a null-terminated json string.", - (int)nbytes); - EXPECT(str[nbytes - 1] == '\0', "String must be null-terminated"); - EXPECT(str[0] == '{', "json string must start with \'{\'"); - EXPECT(str[nbytes - 2] == '}', "json string must end with \'}\'"); + auto job = jobs_.front(); + jobs_.pop(); + return job; } -/// \brief Check that the argument is a writable directory. -/// \param path The path to check. -/// \throw std::runtime_error if \par path is either not a directory or not -/// writable. void -zarr::validate_directory_is_writable(const std::string& path) -{ - EXPECT(fs::is_directory(path), - "Expected \"%s\" to be a directory.", - path.c_str()); - - const auto perms = fs::status(fs::path(path)).permissions(); - - EXPECT((perms & (fs::perms::owner_write | fs::perms::group_write | - fs::perms::others_write)) != fs::perms::none, - "Expected \"%s\" to have write permissions.", - path.c_str()); -} - -/// \brief Compute the number of bytes in a frame, given an image shape. -/// \param image_shape Description of the image's shape. -/// \return The number of bytes to expect in a frame. -size_t -zarr::get_bytes_per_frame(const ImageShape& image_shape) noexcept -{ - return zarr::bytes_per_sample_type(image_shape.type) * - image_shape.dims.channels * image_shape.dims.height * - image_shape.dims.width * image_shape.dims.planes; -} - -/// \brief Compute the number of bytes in a tile, given an image shape and a -/// tile shape. -/// \param image_shape Description of the image's shape. -/// \param tile_shape Description of the tile's shape. -/// \return The number of bytes to expect in a tile. -size_t -zarr::get_bytes_per_tile(const ImageShape& image_shape, - const TileShape& tile_shape) noexcept -{ - return zarr::bytes_per_sample_type(image_shape.type) * - image_shape.dims.channels * tile_shape.height * tile_shape.width * - tile_shape.planes; -} - -uint32_t -zarr::get_tiles_per_chunk(const ImageShape& image_shape, - const TileShape& tile_shape, - uint64_t max_bytes_per_chunk) noexcept -{ - auto bpt = (float)get_bytes_per_tile(image_shape, tile_shape); - if (0 == bpt) - return 0; - return (uint32_t)std::floor((float)max_bytes_per_chunk / bpt); -} - -size_t -zarr::get_bytes_per_chunk(const ImageShape& image_shape, - const TileShape& tile_shape, - size_t max_bytes_per_chunk) noexcept -{ - return get_bytes_per_tile(image_shape, tile_shape) * - get_tiles_per_chunk(image_shape, tile_shape, max_bytes_per_chunk); -} - -/// \brief Write a string to a file. -/// @param path The path of the file to write. -/// @param str The string to write. -void -zarr::write_string(const std::string& path, const std::string& str) -{ - if (auto p = fs::path(path); !fs::exists(p.parent_path())) - fs::create_directories(p.parent_path()); - - struct file f = { 0 }; - auto is_ok = file_create(&f, path.c_str(), path.size()); - is_ok &= file_write(&f, // file - 0, // offset - (uint8_t*)str.c_str(), // cur - (uint8_t*)(str.c_str() + str.size()) // end - ); - EXPECT(is_ok, "Write to \"%s\" failed.", path.c_str()); - TRACE("Wrote %d bytes to \"%s\".", str.size(), path.c_str()); - file_close(&f); -} - -void -zarr::worker_thread(ThreadContext* ctx) +zarr::Zarr::worker_thread_(ThreadContext* ctx) { using namespace std::chrono_literals; TRACE("Worker thread starting."); - CHECK(ctx); + if (nullptr == ctx) { + LOGE("Null context passed to worker thread."); + return; + } while (true) { std::unique_lock lock(ctx->mutex); - ctx->cv.wait_for(lock, 5ms, [&] { return ctx->should_stop; }); + ctx->cv.wait_for(lock, 1ms, [&] { return ctx->should_stop; }); if (ctx->should_stop) { break; } - if (auto job = ctx->zarr->pop_from_job_queue(); job.has_value()) { - CHECK(job.value()()); + if (auto job = pop_from_job_queue_(); job.has_value()) { + ctx->ready = false; + std::string err_msg; + if (!job.value()(err_msg)) { + set_error(err_msg); + } + ctx->ready = true; + lock.unlock(); + ctx->cv.notify_one(); + } else { + lock.unlock(); + std::this_thread::sleep_for(1ms); } } TRACE("Worker thread exiting."); } -zarr::StorageInterface::StorageInterface() - : Storage{ - .state = DeviceState_AwaitingConfiguration, - .set = ::zarr_set, - .get = ::zarr_get, - .get_meta = ::zarr_get_meta, - .start = ::zarr_start, - .append = ::zarr_append, - .stop = ::zarr_stop, - .destroy = ::zarr_destroy, - .reserve_image_shape = ::zarr_reserve_image_shape, - } -{ +#ifndef NO_UNIT_TESTS +#ifdef _WIN32 +#define acquire_export __declspec(dllexport) +#else +#define acquire_export +#endif + +///< Test that a single frame with 1 plane is padded and averaged correctly. +template +void +test_average_frame_inner(const SampleType& stype) +{ + auto* src = (VideoFrame*)malloc(sizeof(VideoFrame) + 9 * sizeof(T)); + src->bytes_of_frame = sizeof(*src) + 9 * sizeof(T); + src->shape = { + .dims = { + .channels = 1, + .width = 3, + .height = 3, + .planes = 1, + }, + .strides = { + .channels = 1, + .width = 1, + .height = 3, + .planes = 9 + }, + .type = stype + }; + + for (auto i = 0; i < 9; ++i) { + ((T*)src->data)[i] = (T)(i + 1); + } + + auto dst = scale_image(src); + CHECK(((T*)dst->data)[0] == (T)3); + CHECK(((T*)dst->data)[1] == (T)4.5); + CHECK(((T*)dst->data)[2] == (T)7.5); + CHECK(((T*)dst->data)[3] == (T)9); + + free(src); + free(dst); } -extern "C" struct Storage* -zarr_init() +extern "C" { - try { - return new zarr::Zarr(); - } catch (const std::exception& exc) { - LOGE("Exception: %s\n", exc.what()); - } catch (...) { - LOGE("Exception: (unknown)"); + acquire_export int unit_test__average_frame() + { + try { + test_average_frame_inner(SampleType_u8); + test_average_frame_inner(SampleType_i8); + test_average_frame_inner(SampleType_u16); + test_average_frame_inner(SampleType_i16); + test_average_frame_inner(SampleType_f32); + } catch (const std::exception& exc) { + LOGE("Exception: %s\n", exc.what()); + return 0; + } catch (...) { + LOGE("Exception: (unknown)"); + return 0; + } + + return 1; } - return nullptr; } +#endif diff --git a/src/zarr.driver.c b/src/zarr.driver.c index a4dfa402..75ae89ff 100644 --- a/src/zarr.driver.c +++ b/src/zarr.driver.c @@ -34,11 +34,17 @@ // // The deallocate themselves when their `destroy()` method is called. struct Storage* -zarr_init(); +zarr_v2_init(); struct Storage* -compressed_zarr_zstd_init(); +compressed_zarr_v2_zstd_init(); struct Storage* -compressed_zarr_lz4_init(); +compressed_zarr_v2_lz4_init(); +struct Storage* +zarr_v3_init(); +struct Storage* +compressed_zarr_v3_zstd_init(); +struct Storage* +compressed_zarr_v3_lz4_init(); // // GLOBALS @@ -49,6 +55,9 @@ enum StorageKind Storage_Zarr, Storage_ZarrBlosc1ZstdByteShuffle, Storage_ZarrBlosc1Lz4ByteShuffle, + Storage_ZarrV3, + Storage_ZarrV3Blosc1ZstdByteShuffle, + Storage_ZarrV3Blosc1Lz4ByteShuffle, Storage_Number_Of_Kinds }; @@ -71,6 +80,9 @@ storage_kind_to_string(const enum StorageKind kind) CASE(Storage_Zarr); CASE(Storage_ZarrBlosc1ZstdByteShuffle); CASE(Storage_ZarrBlosc1Lz4ByteShuffle); + CASE(Storage_ZarrV3); + CASE(Storage_ZarrV3Blosc1ZstdByteShuffle); + CASE(Storage_ZarrV3Blosc1Lz4ByteShuffle); #undef CASE default: return "(unknown)"; @@ -99,6 +111,9 @@ zarr_describe(const struct Driver* driver, XXX(Zarr), XXX(ZarrBlosc1ZstdByteShuffle), XXX(ZarrBlosc1Lz4ByteShuffle), + XXX(ZarrV3), + XXX(ZarrV3Blosc1ZstdByteShuffle), + XXX(ZarrV3Blosc1Lz4ByteShuffle), }; // clang-format on #undef XXX @@ -157,9 +172,13 @@ acquire_driver_init_v0(acquire_reporter_t reporter) sizeof(globals.constructors[0]) * Storage_Number_Of_Kinds; CHECK(globals.constructors = (struct Storage * (**)()) malloc(nbytes)); struct Storage* (*impls[])() = { - [Storage_Zarr] = zarr_init, - [Storage_ZarrBlosc1ZstdByteShuffle] = compressed_zarr_zstd_init, - [Storage_ZarrBlosc1Lz4ByteShuffle] = compressed_zarr_lz4_init, + [Storage_Zarr] = zarr_v2_init, + [Storage_ZarrBlosc1ZstdByteShuffle] = compressed_zarr_v2_zstd_init, + [Storage_ZarrBlosc1Lz4ByteShuffle] = compressed_zarr_v2_lz4_init, + [Storage_ZarrV3] = zarr_v3_init, + [Storage_ZarrV3Blosc1ZstdByteShuffle] = + compressed_zarr_v3_zstd_init, + [Storage_ZarrV3Blosc1Lz4ByteShuffle] = compressed_zarr_v3_lz4_init, }; memcpy( globals.constructors, impls, nbytes); // cppcheck-suppress uninitvar diff --git a/src/zarr.encoder.cpp b/src/zarr.encoder.cpp deleted file mode 100644 index ac951b5b..00000000 --- a/src/zarr.encoder.cpp +++ /dev/null @@ -1,91 +0,0 @@ -#include "zarr.encoder.hh" - -#include -#include - -#ifdef min -#undef min -#endif - -namespace fs = std::filesystem; - -namespace acquire::sink::zarr { - -BaseEncoder::BaseEncoder() - : cursor_{ 0 } - , bytes_per_pixel_{ 1 } - , file_{ nullptr } -{ -} - -void -BaseEncoder::set_bytes_per_pixel(size_t bpp) -{ - bytes_per_pixel_ = bpp; -} - -void -BaseEncoder::allocate_buffer(size_t buf_size) -{ - buf_.resize(buf_size); - cursor_ = 0; -} - -size_t -BaseEncoder::write(const uint8_t* beg, const uint8_t* end) -{ - /* - Some cases: - 1. The buffer already has some data in it. - => Fill it. If full flush. - 2. The buffer is empty. - => if (end-beg) > capacity_, just write capacity_ bytes directly. - Bypass the buffer and avoid a copy. - => Otherwise append [beg,end) to the buffer - - At the end, flush if the buffer is full and if there are any bytes - remaining, try again. - */ - - for (const uint8_t* cur = beg; cur < end;) { - if (buf_.empty() && (end - cur) >= buf_.size()) { - cur += write(cur, cur + buf_.size()); - } else { - // The buffer has some data in it, or we haven't pushed enough - // data to fill it. - size_t remaining = buf_.size() - cursor_; - const uint8_t* fitting_end = std::min(cur + remaining, end); - std::copy(cur, fitting_end, buf_.data() + cursor_); - - cursor_ += fitting_end - cur; - cur = fitting_end; - } - - if (buf_.size() == cursor_) { - flush(); - } - } - - return end - beg; -} - -size_t -BaseEncoder::flush() -{ - if (0 == cursor_) - return 0; - - EXPECT(nullptr != file_, "Data on buffer, but no file to flush to."); - - size_t nbytes_out = flush_impl(); - cursor_ = 0; - - return nbytes_out; -} - -void -BaseEncoder::set_file(struct file* file_handle) -{ - file_ = file_handle; -} -} // namespace acquire::sink::zarr \ No newline at end of file diff --git a/src/zarr.encoder.hh b/src/zarr.encoder.hh deleted file mode 100644 index c7b57883..00000000 --- a/src/zarr.encoder.hh +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef H_ACQUIRE_STORAGE_ZARR_ENCODER_V0 -#define H_ACQUIRE_STORAGE_ZARR_ENCODER_V0 - -#ifdef __cplusplus - -#include -#include - -#include "platform.h" - -#include "prelude.h" -#include "json.hpp" - -namespace acquire::sink::zarr { - -struct BaseEncoder -{ - public: - BaseEncoder(); - virtual ~BaseEncoder() noexcept = default; - - size_t write(const uint8_t* beg, const uint8_t* end); - size_t flush(); - void set_bytes_per_pixel(size_t bpp); - void allocate_buffer(size_t buf_size); - - virtual void set_file(struct file* file_handle); - - protected: - std::vector buf_; - size_t cursor_; - size_t bytes_per_pixel_; - std::string path_; - struct file* file_; // non-owning - - virtual size_t flush_impl() = 0; -}; -} // namespace acquire::sink::zarr - -#endif // __cplusplus -#endif // H_ACQUIRE_STORAGE_ZARR_ENCODER_V0 diff --git a/src/zarr.hh b/src/zarr.hh index d9e88fec..f9748a31 100644 --- a/src/zarr.hh +++ b/src/zarr.hh @@ -1,41 +1,27 @@ #ifndef H_ACQUIRE_STORAGE_ZARR_V0 #define H_ACQUIRE_STORAGE_ZARR_V0 +#ifndef __cplusplus +#error "This header requires C++20" +#endif + #include "device/kit/storage.h" -#include "platform.h" -#include "logger.h" #include "prelude.h" -#include "chunk.writer.hh" -#include "frame.scaler.hh" +#include "common.hh" +#include "writers/writer.hh" +#include "writers/blosc.compressor.hh" -#include #include +#include #include -#include -#include -#include -#include -#include +#include +#include // std::pair #include -#ifndef __cplusplus -#error "This header requires C++20" -#endif +namespace fs = std::filesystem; namespace acquire::sink::zarr { - -struct Zarr; - -struct ThreadContext -{ - Zarr* zarr; - std::thread thread; - std::mutex mutex; - std::condition_variable cv; - bool should_stop; -}; - // StorageInterface struct StorageInterface : public Storage @@ -55,131 +41,93 @@ struct StorageInterface : public Storage virtual void reserve_image_shape(const ImageShape* shape) = 0; }; -/// \brief Zarr writer that conforms to v0.4 of the OME-NGFF specification. -/// -/// This writes one multi-scale zarr image with one level/scale using the -/// OME-NGFF specification to determine the directory structure and contents -/// of group and array attributes. -/// -/// https://ngff.openmicroscopy.org/0.4/ -struct Zarr final : StorageInterface +struct Zarr : StorageInterface { - using JobT = std::function; + public: + using JobT = std::function; + struct ThreadContext + { + std::thread thread; + std::mutex mutex; + std::condition_variable cv; + bool should_stop; + bool ready; + }; Zarr(); - explicit Zarr(CompressionParams&& compression_params); - ~Zarr() override; + Zarr(BloscCompressionParams&& compression_params); + ~Zarr() noexcept override; + /// StorageInterface void set(const StorageProperties* props) override; void get(StorageProperties* props) const override; - void get_meta(StoragePropertyMetadata* meta) const override; void start() override; - [[nodiscard]] int stop() noexcept override; - - /// @return number of consumed bytes + int stop() noexcept override; size_t append(const VideoFrame* frames, size_t nbytes) override; - void reserve_image_shape(const ImageShape* shape) override; - void push_frame_to_writers(const std::shared_ptr frame); - std::optional pop_from_job_queue(); + /// Error state + void set_error(const std::string& msg) noexcept; + + /// Multithreading + void push_to_job_queue(JobT&& job); + size_t jobs_on_queue() const; - private: + protected: using ChunkingProps = StorageProperties::storage_properties_chunking_s; using ChunkingMeta = StoragePropertyMetadata::storage_property_metadata_chunking_s; - // static - set on construction - char dimension_separator_; - std::optional compression_params_; - std::vector thread_pool_; + /// static - set on construction + std::optional blosc_compression_params_; - // changes on set() - std::string data_dir_; + /// changes on set + fs::path dataset_root_; std::string external_metadata_json_; PixelScale pixel_scale_um_; uint64_t max_bytes_per_chunk_; - ImageShape image_shape_; - TileShape tile_shape_; bool enable_multiscale_; - /// Downsampling of incoming frames. - std::optional frame_scaler_; + /// changes on reserve_image_shape + std::vector> image_tile_shapes_; + SampleType pixel_type_; + std::vector> writers_; - /// Chunk writers for each layer/scale - std::map>> writers_; + /// changes on append + // scaled frames, keyed by level-of-detail + std::unordered_map> scaled_frames_; - // changes during acquisition - uint32_t frame_count_; - mutable std::mutex job_queue_mutex_; - std::queue job_queue_; + /// Multithreading + std::vector threads_; + mutable std::mutex mutex_; // for jobs_ and error_ / error_msg_ + std::queue jobs_; - void set_chunking(const ChunkingProps& props, const ChunkingMeta& meta); - - void create_data_directory_() const; - void write_zarray_json_() const; - void write_zarray_json_inner_(size_t layer, - const ImageShape& image_shape, - const TileShape& tile_shape) const; - void write_external_metadata_json_() const; - void write_zgroup_json_() const; - void write_group_zattrs_json_() const; - - void allocate_writers_(); - void validate_image_and_tile_shapes_() const; - - void start_threads_(); - void recover_threads_(); -}; - -// utilities - -void -validate_props(const StorageProperties* props); - -std::filesystem::path -as_path(const StorageProperties& props); + /// Error state + bool error_; + std::string error_msg_; -void -validate_image_shapes_equal(const ImageShape& lhs, const ImageShape& rhs); - -const char* -sample_type_to_dtype(SampleType t); - -const char* -sample_type_to_string(SampleType t) noexcept; - -size_t -bytes_per_sample_type(SampleType t) noexcept; - -void -validate_json(const char* str, size_t nbytes); - -void -validate_directory_is_writable(const std::string& path); - -size_t -get_bytes_per_frame(const ImageShape& image_shape) noexcept; + /// Setup + void set_chunking(const ChunkingProps& props, const ChunkingMeta& meta); + virtual void allocate_writers_() = 0; -size_t -get_bytes_per_tile(const ImageShape& image_shape, - const TileShape& tile_shape) noexcept; + /// Metadata + void write_all_array_metadata_() const; + virtual void write_array_metadata_(size_t level) const = 0; + virtual void write_external_metadata_() const = 0; + virtual void write_base_metadata_() const = 0; + virtual void write_group_metadata_() const = 0; -uint32_t -get_tiles_per_chunk(const ImageShape& image_shape, - const TileShape& tile_shape, - uint64_t max_bytes_per_chunk) noexcept; + /// Filesystem + virtual fs::path get_data_directory_() const = 0; -size_t -get_bytes_per_chunk(const ImageShape& image_shape, - const TileShape& tile_shape, - size_t max_bytes_per_chunk) noexcept; + /// Multiscale + void write_multiscale_frames_(const VideoFrame* frame); -void -write_string(const std::string& path, const std::string& str); + /// Multithreading + std::optional pop_from_job_queue_() noexcept; + void worker_thread_(ThreadContext* ctx); +}; -void -worker_thread(ThreadContext* ctx); } // namespace acquire::sink::zarr #endif // H_ACQUIRE_STORAGE_ZARR_V0 diff --git a/src/zarr.raw.cpp b/src/zarr.raw.cpp deleted file mode 100644 index 9552d59e..00000000 --- a/src/zarr.raw.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "zarr.raw.hh" - -namespace acquire::sink::zarr { -RawEncoder::RawEncoder() - : file_offset_{ 0 } -{ -} - -void - RawEncoder::set_file(struct file* file_handle) -{ - BaseEncoder::set_file(file_handle); - file_offset_ = 0; -} - -size_t -RawEncoder::flush_impl() -{ - CHECK(file_write(file_, file_offset_, buf_.data(), buf_.data() + cursor_)); - file_offset_ += cursor_; - - return cursor_; -} -} // namespace acquire::sink::zarr \ No newline at end of file diff --git a/src/zarr.raw.hh b/src/zarr.raw.hh deleted file mode 100644 index 376e5912..00000000 --- a/src/zarr.raw.hh +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef ACQUIRE_STORAGE_ZARR_RAW_HH -#define ACQUIRE_STORAGE_ZARR_RAW_HH - -#ifdef __cplusplus - -#include "zarr.encoder.hh" - -namespace acquire::sink::zarr { - -struct RawEncoder final : public BaseEncoder -{ - public: - RawEncoder(); - - void set_file(struct file* file_handle) override; - - private: - size_t file_offset_; - - size_t flush_impl() override; -}; -} // namespace acquire::sink::zarr - -#endif // __cplusplus -#endif // ACQUIRE_STORAGE_ZARR_RAW_HH diff --git a/src/zarr.v2.cpp b/src/zarr.v2.cpp new file mode 100644 index 00000000..256faeaf --- /dev/null +++ b/src/zarr.v2.cpp @@ -0,0 +1,268 @@ +#include "zarr.v2.hh" +#include "writers/chunk.writer.hh" + +#include "json.hpp" + +namespace zarr = acquire::sink::zarr; + +namespace { +template +struct Storage* +compressed_zarr_v2_init() +{ + try { + zarr::BloscCompressionParams params( + zarr::compression_codec_as_string(), 1, 1); + return new zarr::ZarrV2(std::move(params)); + } catch (const std::exception& exc) { + LOGE("Exception: %s\n", exc.what()); + } catch (...) { + LOGE("Exception: (unknown)"); + } + return nullptr; +} +} // end ::{anonymous} namespace + +/// ZarrV2 +zarr::ZarrV2::ZarrV2(BloscCompressionParams&& compression_params) + : Zarr(std::move(compression_params)) +{ +} + +void +zarr::ZarrV2::get_meta(StoragePropertyMetadata* meta) const +{ + CHECK(meta); + *meta = { + .chunking = { + .supported = 1, + .max_bytes_per_chunk = { + .writable = 1, + .low = (float)(16 << 20), + .high = (float)(1 << 30), + .type = PropertyType_FixedPrecision }, + }, + .multiscale = { + .supported = 1, + } + }; +} + +void +zarr::ZarrV2::allocate_writers_() +{ + writers_.clear(); + for (auto i = 0; i < image_tile_shapes_.size(); ++i) { + const auto& image_shape = image_tile_shapes_.at(i).first; + const auto& tile_shape = image_tile_shapes_.at(i).second; + uint64_t bytes_per_tile = + common::bytes_per_tile(tile_shape, pixel_type_); + if (blosc_compression_params_.has_value()) { + writers_.push_back(std::make_shared( + image_shape, + tile_shape, + (uint32_t)(max_bytes_per_chunk_ / bytes_per_tile), + (get_data_directory_() / std::to_string(i)).string(), + this, + blosc_compression_params_.value())); + } else { + writers_.push_back(std::make_shared( + image_shape, + tile_shape, + (uint32_t)(max_bytes_per_chunk_ / bytes_per_tile), + (get_data_directory_() / std::to_string(i)).string(), + this)); + } + } +} + +void +zarr::ZarrV2::write_array_metadata_(size_t level) const +{ + namespace fs = std::filesystem; + using json = nlohmann::json; + + if (writers_.size() <= level) { + return; + } + + const ImageDims& image_dims = image_tile_shapes_.at(level).first; + const ImageDims& tile_dims = image_tile_shapes_.at(level).second; + + const auto frame_count = (uint64_t)writers_.at(level)->frames_written(); + const auto frames_per_chunk = + std::min(frame_count, + (uint64_t)common::frames_per_chunk( + tile_dims, pixel_type_, max_bytes_per_chunk_)); + + json zarray_attrs = { + { "zarr_format", 2 }, + { "shape", + { + frame_count, // t + // TODO (aliddell): c? + 1, // z + image_dims.rows, // y + image_dims.cols, // x + } }, + { "chunks", + { + frames_per_chunk, // t + // TODO (aliddell): c? + 1, // z + tile_dims.rows, // y + tile_dims.cols, // x + } }, + { "dtype", common::sample_type_to_dtype(pixel_type_) }, + { "fill_value", 0 }, + { "order", "C" }, + { "filters", nullptr }, + { "dimension_separator", "/" }, + }; + + if (blosc_compression_params_.has_value()) { + zarray_attrs["compressor"] = blosc_compression_params_.value(); + } else { + zarray_attrs["compressor"] = nullptr; + } + + std::string zarray_path = + (dataset_root_ / std::to_string(level) / ".zarray").string(); + common::write_string(zarray_path, zarray_attrs.dump()); +} + +void +zarr::ZarrV2::write_external_metadata_() const +{ + namespace fs = std::filesystem; + using json = nlohmann::json; + + std::string zattrs_path = (dataset_root_ / "0" / ".zattrs").string(); + common::write_string(zattrs_path, external_metadata_json_); +} + +void +zarr::ZarrV2::write_base_metadata_() const +{ + namespace fs = std::filesystem; + using json = nlohmann::json; + + const json zgroup = { { "zarr_format", 2 } }; + std::string zgroup_path = (dataset_root_ / ".zgroup").string(); + common::write_string(zgroup_path, zgroup.dump()); +} + +void +zarr::ZarrV2::write_group_metadata_() const +{ + namespace fs = std::filesystem; + using json = nlohmann::json; + + json zgroup_attrs; + zgroup_attrs["multiscales"] = json::array({ json::object() }); + zgroup_attrs["multiscales"][0]["version"] = "0.4"; + zgroup_attrs["multiscales"][0]["axes"] = { + { + { "name", "t" }, + { "type", "time" }, + }, + { + { "name", "c" }, + { "type", "channel" }, + }, + { + { "name", "y" }, + { "type", "space" }, + { "unit", "micrometer" }, + }, + { + { "name", "x" }, + { "type", "space" }, + { "unit", "micrometer" }, + }, + }; + + // spatial multiscale metadata + if (writers_.empty()) { + zgroup_attrs["multiscales"][0]["datasets"] = { + { + { "path", "0" }, + { "coordinateTransformations", + { + { + { "type", "scale" }, + { "scale", { 1, 1, pixel_scale_um_.y, pixel_scale_um_.x } }, + }, + } }, + }, + }; + } else { + for (auto i = 0; i < writers_.size(); ++i) { + zgroup_attrs["multiscales"][0]["datasets"].push_back({ + { "path", std::to_string(i) }, + { "coordinateTransformations", + { + { + { "type", "scale" }, + { + "scale", + { + std::pow(2, i), // t + // TODO (aliddell): c? + 1, // z + std::pow(2, i) * pixel_scale_um_.y, // y + std::pow(2, i) * pixel_scale_um_.x // x + }, + }, + }, + } }, + }); + } + + // downsampling metadata + zgroup_attrs["multiscales"][0]["type"] = "local_mean"; + zgroup_attrs["multiscales"][0]["metadata"] = { + { "description", + "The fields in the metadata describe how to reproduce this " + "multiscaling in scikit-image. The method and its parameters are " + "given here." }, + { "method", "skimage.transform.downscale_local_mean" }, + { "version", "0.21.0" }, + { "args", "[2]" }, + { "kwargs", { "cval", 0 } }, + }; + } + + std::string zattrs_path = (dataset_root_ / ".zattrs").string(); + common::write_string(zattrs_path, zgroup_attrs.dump(4)); +} + +fs::path +zarr::ZarrV2::get_data_directory_() const +{ + return dataset_root_; +} + +extern "C" +{ + struct Storage* zarr_v2_init() + { + try { + return new zarr::ZarrV2(); + } catch (const std::exception& exc) { + LOGE("Exception: %s\n", exc.what()); + } catch (...) { + LOGE("Exception: (unknown)"); + } + return nullptr; + } + struct Storage* compressed_zarr_v2_zstd_init() + { + return compressed_zarr_v2_init(); + } + + struct Storage* compressed_zarr_v2_lz4_init() + { + return compressed_zarr_v2_init(); + } +} diff --git a/src/zarr.v2.hh b/src/zarr.v2.hh new file mode 100644 index 00000000..d281b207 --- /dev/null +++ b/src/zarr.v2.hh @@ -0,0 +1,32 @@ +#ifndef H_ACQUIRE_STORAGE_ZARR_V2_V0 +#define H_ACQUIRE_STORAGE_ZARR_V2_V0 + +#include "zarr.hh" + +namespace acquire::sink::zarr { +struct ZarrV2 final : public Zarr +{ + public: + ZarrV2() = default; + ZarrV2(BloscCompressionParams&& compression_params); + ~ZarrV2() override = default; + + /// StorageInterface + void get_meta(StoragePropertyMetadata* meta) const override; + + private: + /// Setup + void allocate_writers_() override; + + /// Metadata + void write_array_metadata_(size_t level) const override; + void write_external_metadata_() const override; + void write_base_metadata_() const override; + void write_group_metadata_() const override; + + /// Filesystem + fs::path get_data_directory_() const override; +}; +} // namespace acquire::sink::zarr + +#endif // H_ACQUIRE_STORAGE_ZARR_V2_V0 diff --git a/src/zarr.v3.cpp b/src/zarr.v3.cpp new file mode 100644 index 00000000..243f8262 --- /dev/null +++ b/src/zarr.v3.cpp @@ -0,0 +1,299 @@ +#include "zarr.v3.hh" +#include "writers/shard.writer.hh" + +#include "json.hpp" + +#include + +namespace zarr = acquire::sink::zarr; + +namespace { +template +struct Storage* +compressed_zarr_v3_init() +{ + try { + zarr::BloscCompressionParams params( + zarr::compression_codec_as_string(), 1, 1); + return new zarr::ZarrV3(std::move(params)); + } catch (const std::exception& exc) { + LOGE("Exception: %s\n", exc.what()); + } catch (...) { + LOGE("Exception: (unknown)"); + } + return nullptr; +} + +uint32_t +smallest_prime_factor(uint32_t n) +{ + if (n < 2) { + return 1; + } else if (n % 2 == 0) { + return 2; + } + + // collect additional primes + std::vector primes = { 3, 5, 7, 11, 13, 17, 19, 23 }; + for (auto i = 27; i * i <= n; i += 2) { + bool is_prime = true; + for (auto p : primes) { + if (i % p == 0) { + is_prime = false; + break; + } + } + if (is_prime) { + primes.push_back(i); + } + } + + for (auto p : primes) { + if (n % p == 0) { + return p; + } + } + + return n; +} + +zarr::ImageDims +make_shard_dims(const zarr::ImageDims& frame_dims, + const zarr::ImageDims& tile_dims) +{ + zarr::ImageDims shard_dims = { + .cols = frame_dims.cols, + .rows = frame_dims.rows, + }; + + const auto h_rat = (float)frame_dims.rows / (float)tile_dims.rows; + auto shard_rows = (uint32_t)std::ceil(h_rat * tile_dims.rows); + if (shard_rows > frame_dims.rows) { + auto n_shards_rows = smallest_prime_factor(shard_rows / tile_dims.rows); + shard_dims.rows = n_shards_rows * tile_dims.rows; + } + + const auto w_rat = (float)frame_dims.cols / (float)tile_dims.cols; + auto shard_cols = (uint32_t)std::ceil(w_rat * tile_dims.cols); + if (shard_cols > frame_dims.cols) { + auto n_shards_cols = smallest_prime_factor(shard_cols / tile_dims.cols); + shard_dims.cols = n_shards_cols * tile_dims.cols; + } + + return shard_dims; +} +} // end ::{anonymous} namespace + +zarr::ZarrV3::ZarrV3(BloscCompressionParams&& compression_params) + : Zarr(std::move(compression_params)) + , shard_dims_{} +{ +} + +void +zarr::ZarrV3::allocate_writers_() +{ + const ImageDims& frame_dims = image_tile_shapes_.at(0).first; + const ImageDims& tile_dims = image_tile_shapes_.at(0).second; + shard_dims_ = make_shard_dims(frame_dims, tile_dims); + + uint64_t bytes_per_tile = common::bytes_per_tile(tile_dims, pixel_type_); + + writers_.clear(); + if (blosc_compression_params_.has_value()) { + writers_.push_back(std::make_shared( + frame_dims, + shard_dims_, + tile_dims, + (uint32_t)(max_bytes_per_chunk_ / bytes_per_tile), + (get_data_directory_() / "0").string(), + this, + blosc_compression_params_.value())); + } else { + writers_.push_back(std::make_shared( + frame_dims, + shard_dims_, + tile_dims, + (uint32_t)(max_bytes_per_chunk_ / bytes_per_tile), + (get_data_directory_() / "0").string(), + this)); + } +} + +void +zarr::ZarrV3::get_meta(StoragePropertyMetadata* meta) const +{ + CHECK(meta); + *meta = { + .chunking = { + .supported = 1, + .max_bytes_per_chunk = { + .writable = 1, + .low = (float)(16 << 20), + .high = (float)(1 << 30), + .type = PropertyType_FixedPrecision }, + }, + .multiscale = { + .supported = 0, + } + }; +} + +void +zarr::ZarrV3::write_array_metadata_(size_t level) const +{ + namespace fs = std::filesystem; + using json = nlohmann::json; + + if (writers_.size() <= level) { + return; + } + + const ImageDims& image_dims = image_tile_shapes_.at(level).first; + const ImageDims& tile_dims = image_tile_shapes_.at(level).second; + + const uint64_t frame_count = writers_.at(level)->frames_written(); + const auto frames_per_chunk = + std::min(frame_count, + (uint64_t)common::frames_per_chunk( + tile_dims, pixel_type_, max_bytes_per_chunk_)); + + json metadata; + metadata["attributes"] = json::object(); + metadata["chunk_grid"] = json::object({ + { "chunk_shape", + json::array({ + frames_per_chunk, // t + // TODO (aliddell): c? + 1, // z + tile_dims.rows, // y + tile_dims.cols, // x + }) }, + { "separator", "/" }, + { "type", "regular" }, + }); + metadata["chunk_memory_layout"] = "C"; + metadata["data_type"] = common::sample_type_to_dtype(pixel_type_); + metadata["extensions"] = json::array(); + metadata["fill_value"] = 0; + metadata["shape"] = json::array({ + frame_count, // t + // TODO (aliddell): c? + 1, // z + image_dims.rows, // y + image_dims.cols, // x + }); + + if (blosc_compression_params_.has_value()) { + auto params = blosc_compression_params_.value(); + metadata["compressor"] = json::object({ + { "codec", "https://purl.org/zarr/spec/codec/blosc/1.0" }, + { "configuration", + json::object({ + { "blocksize", 0 }, + { "clevel", params.clevel }, + { "cname", params.codec_id }, + { "shuffle", params.shuffle }, + }) }, + }); + } + + // sharding storage transformer + // TODO (aliddell): + // https://github.com/zarr-developers/zarr-python/issues/877 + metadata["storage_transformers"] = json::array(); + metadata["storage_transformers"][0] = json::object({ + { "type", "indexed" }, + { "extension", + "https://purl.org/zarr/spec/storage_transformers/sharding/1.0" }, + { "configuration", + json::object({ + { "chunks_per_shard", + json::array({ + 1, // t + // TODO (aliddell): c? + 1, // z + shard_dims_.rows / tile_dims.rows, // y + shard_dims_.cols / tile_dims.cols, // x + }) }, + }) }, + }); + + auto path = (dataset_root_ / "meta" / "root" / + (std::to_string(level) + ".array.json")) + .string(); + common::write_string(path, metadata.dump(4)); +} + +/// @brief Write the external metadata. +/// @details This is a no-op for ZarrV3. Instead, external metadata is +/// stored in the group metadata. +void +zarr::ZarrV3::write_external_metadata_() const +{ + // no-op +} + +/// @brief Write the metadata for the dataset. +void +zarr::ZarrV3::write_base_metadata_() const +{ + namespace fs = std::filesystem; + using json = nlohmann::json; + + json metadata; + metadata["extensions"] = json::array(); + metadata["metadata_encoding"] = + "https://purl.org/zarr/spec/protocol/core/3.0"; + metadata["metadata_key_suffix"] = ".json"; + metadata["zarr_format"] = "https://purl.org/zarr/spec/protocol/core/3.0"; + + auto path = (dataset_root_ / "zarr.json").string(); + common::write_string(path, metadata.dump(4)); +} + +/// @brief Write the metadata for the group. +/// @details Zarr v3 stores group metadata in +/// /meta/{group_name}.group.json. We will call the group "root". +void +zarr::ZarrV3::write_group_metadata_() const +{ + namespace fs = std::filesystem; + using json = nlohmann::json; + + json metadata; + metadata["attributes"]["acquire"] = json::parse(external_metadata_json_); + + auto path = (dataset_root_ / "meta" / "root.group.json").string(); + common::write_string(path, metadata.dump(4)); +} + +fs::path +zarr::ZarrV3::get_data_directory_() const +{ + return dataset_root_ / "data" / "root"; +} + +extern "C" +{ + struct Storage* zarr_v3_init() + { + try { + return new zarr::ZarrV3(); + } catch (const std::exception& exc) { + LOGE("Exception: %s\n", exc.what()); + } catch (...) { + LOGE("Exception: (unknown)"); + } + return nullptr; + } + struct Storage* compressed_zarr_v3_zstd_init() + { + return compressed_zarr_v3_init(); + } + + struct Storage* compressed_zarr_v3_lz4_init() + { + return compressed_zarr_v3_init(); + } +} diff --git a/src/zarr.v3.hh b/src/zarr.v3.hh new file mode 100644 index 00000000..1a6ec919 --- /dev/null +++ b/src/zarr.v3.hh @@ -0,0 +1,33 @@ +#ifndef H_ACQUIRE_STORAGE_ZARR_V3_V0 +#define H_ACQUIRE_STORAGE_ZARR_V3_V0 + +#include "zarr.hh" + +namespace acquire::sink::zarr { +struct ZarrV3 final : public Zarr +{ + public: + ZarrV3() = default; + ZarrV3(BloscCompressionParams&& compression_params); + ~ZarrV3() override = default; + + /// StorageInterface + void get_meta(StoragePropertyMetadata* meta) const override; + + private: + ImageDims shard_dims_; + + /// Setup + void allocate_writers_() override; + + /// Metadata + void write_array_metadata_(size_t level) const override; + void write_external_metadata_() const override; + void write_base_metadata_() const override; + void write_group_metadata_() const override; + + /// Filesystem + fs::path get_data_directory_() const override; +}; +} // namespace acquire::sink::zarr +#endif // H_ACQUIRE_STORAGE_ZARR_V3_V0 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d8b4d959..3d479530 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,6 +27,8 @@ else() write-zarr-raw-with-chunking write-zarr-raw-with-chunking-and-rollover write-zarr-raw-with-ragged-tiling + write-zarr-v3-compressed + write-zarr-v3-raw write-zarr-with-defaults write-zarr-with-lz4-compression write-zarr-with-zstd-compression diff --git a/tests/get-meta.cpp b/tests/get-meta.cpp index 9ed0d09e..e12f88fe 100644 --- a/tests/get-meta.cpp +++ b/tests/get-meta.cpp @@ -86,7 +86,9 @@ main() CHECK(Device_Ok == storage_get_meta(storage, &metadata)); CHECK(metadata.chunking.supported); - ASSERT_EQ(int, "%d", PropertyType_FixedPrecision, + ASSERT_EQ(int, + "%d", + PropertyType_FixedPrecision, (int)metadata.chunking.max_bytes_per_chunk.type); // minimum cap on chunk size is 16 MiB ASSERT_EQ(int, @@ -99,7 +101,8 @@ main() 1 << 30, (int)metadata.chunking.max_bytes_per_chunk.high); - CHECK(metadata.multiscale.supported); + CHECK((bool)metadata.multiscale.supported != + name.starts_with("ZarrV3")); CHECK(Device_Ok == driver_close_device(device)); } diff --git a/tests/write-zarr-raw-multiscale.cpp b/tests/write-zarr-raw-multiscale.cpp index 42115e11..e7e891f0 100644 --- a/tests/write-zarr-raw-multiscale.cpp +++ b/tests/write-zarr-raw-multiscale.cpp @@ -97,7 +97,7 @@ acquire(AcquireRuntime* runtime, const char* filename) DEVOK(device_manager_select(dm, DeviceKind_Camera, - SIZED("simulated.*radial.*"), + SIZED("simulated.*empty.*"), &props.video[0].camera.identifier)); DEVOK(device_manager_select(dm, DeviceKind_Storage, @@ -129,7 +129,7 @@ acquire(AcquireRuntime* runtime, const char* filename) props.video[0].camera.settings.pixel_type = SampleType_u8; props.video[0].camera.settings.shape = { .x = frame_width, .y = frame_height }; - props.video[0].camera.settings.exposure_time_us = 1e5; +// props.video[0].camera.settings.exposure_time_us = 1e5; props.video[0].max_frame_count = max_frames; OK(acquire_configure(runtime, &props)); diff --git a/tests/write-zarr-raw-with-ragged-tiling.cpp b/tests/write-zarr-raw-with-ragged-tiling.cpp index c2981280..c6f87a21 100644 --- a/tests/write-zarr-raw-with-ragged-tiling.cpp +++ b/tests/write-zarr-raw-with-ragged-tiling.cpp @@ -88,7 +88,7 @@ acquire(AcquireRuntime* runtime, const char* filename) DEVOK(device_manager_select(dm, DeviceKind_Camera, - SIZED("simulated.*radial.*"), + SIZED("simulated.*empty.*"), &props.video[0].camera.identifier)); DEVOK(device_manager_select(dm, DeviceKind_Storage, @@ -117,8 +117,6 @@ acquire(AcquireRuntime* runtime, const char* filename) props.video[0].camera.settings.pixel_type = SampleType_u8; props.video[0].camera.settings.shape = { .x = frame_width, .y = frame_height }; - // we may drop frames with lower exposure - props.video[0].camera.settings.exposure_time_us = 1e4; props.video[0].max_frame_count = max_frame_count; OK(acquire_configure(runtime, &props)); diff --git a/tests/write-zarr-v3-compressed.cpp b/tests/write-zarr-v3-compressed.cpp new file mode 100644 index 00000000..136a3c40 --- /dev/null +++ b/tests/write-zarr-v3-compressed.cpp @@ -0,0 +1,299 @@ +/// @brief Test the basic Zarr v3 writer. +/// @details Ensure that chunking is working as expected and metadata is written +/// correctly. + +#include "device/hal/device.manager.h" +#include "acquire.h" +#include "platform.h" // clock +#include "logger.h" + +#include +#include +#include + +#include "json.hpp" + +namespace fs = std::filesystem; +using json = nlohmann::json; + +void +reporter(int is_error, + const char* file, + int line, + const char* function, + const char* msg) +{ + fprintf(is_error ? stderr : stdout, + "%s%s(%d) - %s: %s\n", + is_error ? "ERROR " : "", + file, + line, + function, + msg); +} + +/// Helper for passing size static strings as function args. +/// For a function: `f(char*,size_t)` use `f(SIZED("hello"))`. +/// Expands to `f("hello",5)`. +#define SIZED(str) str, sizeof(str) - 1 + +#define L (aq_logger) +#define LOG(...) L(0, __FILE__, __LINE__, __FUNCTION__, __VA_ARGS__) +#define ERR(...) L(1, __FILE__, __LINE__, __FUNCTION__, __VA_ARGS__) +#define EXPECT(e, ...) \ + do { \ + if (!(e)) { \ + char buf[1 << 8] = { 0 }; \ + ERR(__VA_ARGS__); \ + snprintf(buf, sizeof(buf) - 1, __VA_ARGS__); \ + throw std::runtime_error(buf); \ + } \ + } while (0) +#define CHECK(e) EXPECT(e, "Expression evaluated as false: %s", #e) +#define DEVOK(e) CHECK(Device_Ok == (e)) +#define OK(e) CHECK(AcquireStatus_Ok == (e)) + +/// example: `ASSERT_EQ(int,"%d",42,meaning_of_life())` +#define ASSERT_EQ(T, fmt, a, b) \ + do { \ + T a_ = (T)(a); \ + T b_ = (T)(b); \ + EXPECT(a_ == b_, "Expected %s==%s but " fmt "!=" fmt, #a, #b, a_, b_); \ + } while (0) + +/// Check that a>b +/// example: `ASSERT_GT(int,"%d",43,meaning_of_life())` +#define ASSERT_GT(T, fmt, a, b) \ + do { \ + T a_ = (T)(a); \ + T b_ = (T)(b); \ + EXPECT( \ + a_ > b_, "Expected (%s) > (%s) but " fmt "<=" fmt, #a, #b, a_, b_); \ + } while (0) + +const static uint32_t frame_width = 1920; +const static uint32_t tile_width = frame_width / 4; +const static uint32_t frame_height = 1080; +const static uint32_t tile_height = frame_height / 3; +const static uint32_t expected_frames_per_chunk = 48; +const static uint32_t max_frame_count = 48; + +void +setup(AcquireRuntime* runtime) +{ + const char* filename = TEST ".zarr"; + auto dm = acquire_device_manager(runtime); + CHECK(runtime); + CHECK(dm); + + AcquireProperties props = {}; + OK(acquire_get_configuration(runtime, &props)); + + DEVOK(device_manager_select(dm, + DeviceKind_Camera, + SIZED("simulated.*empty.*"), + &props.video[0].camera.identifier)); + DEVOK(device_manager_select(dm, + DeviceKind_Storage, + SIZED("ZarrV3Blosc1ZstdByteShuffle"), + &props.video[0].storage.identifier)); + + const char external_metadata[] = R"({"hello":"world"})"; + const struct PixelScale sample_spacing_um = { 1, 1 }; + + storage_properties_init(&props.video[0].storage.settings, + 0, + (char*)filename, + strlen(filename) + 1, + (char*)external_metadata, + sizeof(external_metadata), + sample_spacing_um); + + storage_properties_set_chunking_props( + &props.video[0].storage.settings, tile_width, tile_height, 1, 16 << 20); + + props.video[0].camera.settings.binning = 1; + props.video[0].camera.settings.pixel_type = SampleType_u8; + props.video[0].camera.settings.shape = { .x = frame_width, + .y = frame_height }; + props.video[0].max_frame_count = max_frame_count; + + OK(acquire_configure(runtime, &props)); +} + +void +acquire(AcquireRuntime* runtime) +{ + const auto next = [](VideoFrame* cur) -> VideoFrame* { + return (VideoFrame*)(((uint8_t*)cur) + cur->bytes_of_frame); + }; + + const auto consumed_bytes = [](const VideoFrame* const cur, + const VideoFrame* const end) -> size_t { + return (uint8_t*)end - (uint8_t*)cur; + }; + + struct clock clock; + static double time_limit_ms = 20000.0; + clock_init(&clock); + clock_shift_ms(&clock, time_limit_ms); + OK(acquire_start(runtime)); + { + uint64_t nframes = 0; + VideoFrame *beg, *end, *cur; + do { + struct clock throttle; + clock_init(&throttle); +// EXPECT(clock_cmp_now(&clock) < 0, +// "Timeout at %f ms", +// clock_toc_ms(&clock) + time_limit_ms); + OK(acquire_map_read(runtime, 0, &beg, &end)); + for (cur = beg; cur < end; cur = next(cur)) { + LOG("stream %d counting frame w id %d", 0, cur->frame_id); + CHECK(cur->shape.dims.width == frame_width); + CHECK(cur->shape.dims.height == frame_height); + ++nframes; + } + { + uint32_t n = consumed_bytes(beg, end); + OK(acquire_unmap_read(runtime, 0, n)); + if (n) + LOG("stream %d consumed bytes %d", 0, n); + } + clock_sleep_ms(&throttle, 100.0f); + + LOG( + "stream %d nframes %d time %f", 0, nframes, clock_toc_ms(&clock)); + } while (DeviceState_Running == acquire_get_state(runtime) && + nframes < max_frame_count); + + OK(acquire_map_read(runtime, 0, &beg, &end)); + for (cur = beg; cur < end; cur = next(cur)) { + LOG("stream %d counting frame w id %d", 0, cur->frame_id); + CHECK(cur->shape.dims.width == frame_width); + CHECK(cur->shape.dims.height == frame_height); + ++nframes; + } + { + uint32_t n = consumed_bytes(beg, end); + OK(acquire_unmap_read(runtime, 0, n)); + if (n) + LOG("stream %d consumed bytes %d", 0, n); + } + + CHECK(nframes == max_frame_count); + } + + OK(acquire_stop(runtime)); +} + +void +validate(AcquireRuntime* runtime) +{ + const fs::path test_path(TEST ".zarr"); + CHECK(fs::is_directory(test_path)); + + // check the zarr.json metadata file + fs::path metadata_path = test_path / "zarr.json"; + CHECK(fs::is_regular_file(metadata_path)); + std::ifstream f(metadata_path); + json metadata = json::parse(f); + + CHECK(metadata["extensions"].empty()); + CHECK("https://purl.org/zarr/spec/protocol/core/3.0" == + metadata["metadata_encoding"]); + CHECK(".json" == metadata["metadata_key_suffix"]); + CHECK("https://purl.org/zarr/spec/protocol/core/3.0" == + metadata["zarr_format"]); + + // check the group metadata file + metadata_path = test_path / "meta" / "root.group.json"; + CHECK(fs::is_regular_file(metadata_path)); + + f = std::ifstream(metadata_path); + metadata = json::parse(f); + CHECK("world" == metadata["attributes"]["acquire"]["hello"]); + + // check the array metadata file + metadata_path = test_path / "meta" / "root" / "0.array.json"; + CHECK(fs::is_regular_file(metadata_path)); + + f = std::ifstream(metadata_path); + metadata = json::parse(f); + + const auto chunk_grid = metadata["chunk_grid"]; + CHECK("/" == chunk_grid["separator"]); + CHECK("regular" == chunk_grid["type"]); + + const auto chunk_shape = chunk_grid["chunk_shape"]; + ASSERT_EQ(int, "%d", expected_frames_per_chunk, chunk_shape[0]); + ASSERT_EQ(int, "%d", 1, chunk_shape[1]); + ASSERT_EQ(int, "%d", tile_height, chunk_shape[2]); + ASSERT_EQ(int, "%d", tile_width, chunk_shape[3]); + + CHECK("C" == metadata["chunk_memory_layout"]); + CHECK("u1" == metadata["data_type"]); + CHECK(metadata["extensions"].empty()); + + const auto array_shape = metadata["shape"]; + ASSERT_EQ(int, "%d", max_frame_count, array_shape[0]); + ASSERT_EQ(int, "%d", 1, array_shape[1]); + ASSERT_EQ(int, "%d", frame_height, array_shape[2]); + ASSERT_EQ(int, "%d", frame_width, array_shape[3]); + + const auto compressor = metadata["compressor"]; + CHECK("https://purl.org/zarr/spec/codec/blosc/1.0" == compressor["codec"]); + + auto configuration = compressor["configuration"]; + ASSERT_EQ(int, "%d", 0, configuration["blocksize"]); + ASSERT_EQ(int, "%d", 1, configuration["clevel"]); + ASSERT_EQ(int, "%d", 1, configuration["shuffle"]); + CHECK("zstd" == configuration["cname"]); + + // sharding + const auto storage_transformers = metadata["storage_transformers"]; + configuration = storage_transformers[0]["configuration"]; + const auto& cps = configuration["chunks_per_shard"]; + ASSERT_EQ(int, "%d", 1, cps[0]); + ASSERT_EQ(int, "%d", 1, cps[1]); + ASSERT_EQ(int, "%d", 3, cps[2]); + ASSERT_EQ(int, "%d", 4, cps[3]); + const size_t chunks_per_shard = cps[0].get() * + cps[1].get() * + cps[2].get() * cps[3].get(); + + // check that each chunked data file is the expected size + uint32_t bytes_per_chunk = + chunk_shape[0].get() * chunk_shape[1].get() * + chunk_shape[2].get() * chunk_shape[3].get(); + for (auto t = 0; t < std::ceil(max_frame_count / expected_frames_per_chunk); + ++t) { + fs::path path = test_path / "data" / "root" / "0" / + ("c" + std::to_string(t)) / "0" / "0" / "0"; + CHECK(fs::is_regular_file(path)); + + auto file_size = fs::file_size(path); + ASSERT_GT(int, "%d", file_size, 0); + ASSERT_GT(int, "%d", chunks_per_shard* bytes_per_chunk, file_size); + } +} + +void +teardown(AcquireRuntime* runtime) +{ + LOG("Done (OK)"); + acquire_shutdown(runtime); +} + +int +main() +{ + auto runtime = acquire_init(reporter); + + setup(runtime); + acquire(runtime); + validate(runtime); + teardown(runtime); + + return 0; +} diff --git a/tests/write-zarr-v3-raw.cpp b/tests/write-zarr-v3-raw.cpp new file mode 100644 index 00000000..404efa51 --- /dev/null +++ b/tests/write-zarr-v3-raw.cpp @@ -0,0 +1,294 @@ +/// @brief Test the basic Zarr v3 writer. +/// @details Ensure that chunking is working as expected and metadata is written +/// correctly. + +#include "device/hal/device.manager.h" +#include "acquire.h" +#include "platform.h" // clock +#include "logger.h" + +#include +#include +#include + +#include "json.hpp" + +namespace fs = std::filesystem; +using json = nlohmann::json; + +void +reporter(int is_error, + const char* file, + int line, + const char* function, + const char* msg) +{ + fprintf(is_error ? stderr : stdout, + "%s%s(%d) - %s: %s\n", + is_error ? "ERROR " : "", + file, + line, + function, + msg); +} + +/// Helper for passing size static strings as function args. +/// For a function: `f(char*,size_t)` use `f(SIZED("hello"))`. +/// Expands to `f("hello",5)`. +#define SIZED(str) str, sizeof(str) - 1 + +#define L (aq_logger) +#define LOG(...) L(0, __FILE__, __LINE__, __FUNCTION__, __VA_ARGS__) +#define ERR(...) L(1, __FILE__, __LINE__, __FUNCTION__, __VA_ARGS__) +#define EXPECT(e, ...) \ + do { \ + if (!(e)) { \ + char buf[1 << 8] = { 0 }; \ + ERR(__VA_ARGS__); \ + snprintf(buf, sizeof(buf) - 1, __VA_ARGS__); \ + throw std::runtime_error(buf); \ + } \ + } while (0) +#define CHECK(e) EXPECT(e, "Expression evaluated as false: %s", #e) +#define DEVOK(e) CHECK(Device_Ok == (e)) +#define OK(e) CHECK(AcquireStatus_Ok == (e)) + +/// example: `ASSERT_EQ(int,"%d",42,meaning_of_life())` +#define ASSERT_EQ(T, fmt, a, b) \ + do { \ + T a_ = (T)(a); \ + T b_ = (T)(b); \ + EXPECT(a_ == b_, "Expected %s==%s but " fmt "!=" fmt, #a, #b, a_, b_); \ + } while (0) + +/// Check that a>b +/// example: `ASSERT_GT(int,"%d",43,meaning_of_life())` +#define ASSERT_GT(T, fmt, a, b) \ + do { \ + T a_ = (T)(a); \ + T b_ = (T)(b); \ + EXPECT( \ + a_ > b_, "Expected (%s) > (%s) but " fmt "<=" fmt, #a, #b, a_, b_); \ + } while (0) + +const static uint32_t frame_width = 1080; +const static uint32_t tile_width = frame_width / 4; +const static uint32_t frame_height = 960; +const static uint32_t tile_height = frame_height / 3; +const static uint32_t expected_frames_per_chunk = 48; +const static uint32_t max_frame_count = 48; + +void +setup(AcquireRuntime* runtime) +{ + const char* filename = TEST ".zarr"; + auto dm = acquire_device_manager(runtime); + CHECK(runtime); + CHECK(dm); + + AcquireProperties props = {}; + OK(acquire_get_configuration(runtime, &props)); + + DEVOK(device_manager_select(dm, + DeviceKind_Camera, + SIZED("simulated.*empty.*"), + &props.video[0].camera.identifier)); + DEVOK(device_manager_select(dm, + DeviceKind_Storage, + SIZED("ZarrV3"), + &props.video[0].storage.identifier)); + + const char external_metadata[] = R"({"hello":"world"})"; + const struct PixelScale sample_spacing_um = { 1, 1 }; + + storage_properties_init(&props.video[0].storage.settings, + 0, + (char*)filename, + strlen(filename) + 1, + (char*)external_metadata, + sizeof(external_metadata), + sample_spacing_um); + + storage_properties_set_chunking_props( + &props.video[0].storage.settings, tile_width, tile_height, 1, 16 << 20); + + props.video[0].camera.settings.binning = 1; + props.video[0].camera.settings.pixel_type = SampleType_u8; + props.video[0].camera.settings.shape = { .x = frame_width, + .y = frame_height }; + props.video[0].max_frame_count = max_frame_count; + + OK(acquire_configure(runtime, &props)); +} + +void +acquire(AcquireRuntime* runtime) +{ + const auto next = [](VideoFrame* cur) -> VideoFrame* { + return (VideoFrame*)(((uint8_t*)cur) + cur->bytes_of_frame); + }; + + const auto consumed_bytes = [](const VideoFrame* const cur, + const VideoFrame* const end) -> size_t { + return (uint8_t*)end - (uint8_t*)cur; + }; + + struct clock clock; + static double time_limit_ms = 20000.0; + clock_init(&clock); + clock_shift_ms(&clock, time_limit_ms); + OK(acquire_start(runtime)); + { + uint64_t nframes = 0; + VideoFrame *beg, *end, *cur; + do { + struct clock throttle; + clock_init(&throttle); + // EXPECT(clock_cmp_now(&clock) < 0, + // "Timeout at %f ms", + // clock_toc_ms(&clock) + time_limit_ms); + OK(acquire_map_read(runtime, 0, &beg, &end)); + for (cur = beg; cur < end; cur = next(cur)) { + LOG("stream %d counting frame w id %d", 0, cur->frame_id); + CHECK(cur->shape.dims.width == frame_width); + CHECK(cur->shape.dims.height == frame_height); + ++nframes; + } + { + uint32_t n = consumed_bytes(beg, end); + OK(acquire_unmap_read(runtime, 0, n)); + if (n) + LOG("stream %d consumed bytes %d", 0, n); + } + clock_sleep_ms(&throttle, 100.0f); + + LOG( + "stream %d nframes %d time %f", 0, nframes, clock_toc_ms(&clock)); + } while (DeviceState_Running == acquire_get_state(runtime) && + nframes < max_frame_count); + + OK(acquire_map_read(runtime, 0, &beg, &end)); + for (cur = beg; cur < end; cur = next(cur)) { + LOG("stream %d counting frame w id %d", 0, cur->frame_id); + CHECK(cur->shape.dims.width == frame_width); + CHECK(cur->shape.dims.height == frame_height); + ++nframes; + } + { + uint32_t n = consumed_bytes(beg, end); + OK(acquire_unmap_read(runtime, 0, n)); + if (n) + LOG("stream %d consumed bytes %d", 0, n); + } + + CHECK(nframes == max_frame_count); + } + + OK(acquire_stop(runtime)); +} + +void +validate(AcquireRuntime* runtime) +{ + const fs::path test_path(TEST ".zarr"); + CHECK(fs::is_directory(test_path)); + + // check the zarr.json metadata file + fs::path metadata_path = test_path / "zarr.json"; + CHECK(fs::is_regular_file(metadata_path)); + std::ifstream f(metadata_path); + json metadata = json::parse(f); + + CHECK(metadata["extensions"].empty()); + CHECK("https://purl.org/zarr/spec/protocol/core/3.0" == + metadata["metadata_encoding"]); + CHECK(".json" == metadata["metadata_key_suffix"]); + CHECK("https://purl.org/zarr/spec/protocol/core/3.0" == + metadata["zarr_format"]); + + // check the group metadata file + metadata_path = test_path / "meta" / "root.group.json"; + CHECK(fs::is_regular_file(metadata_path)); + + f = std::ifstream(metadata_path); + metadata = json::parse(f); + CHECK("world" == metadata["attributes"]["acquire"]["hello"]); + + // check the array metadata file + metadata_path = test_path / "meta" / "root" / "0.array.json"; + CHECK(fs::is_regular_file(metadata_path)); + + f = std::ifstream(metadata_path); + metadata = json::parse(f); + + const auto chunk_grid = metadata["chunk_grid"]; + CHECK("/" == chunk_grid["separator"]); + CHECK("regular" == chunk_grid["type"]); + + const auto chunk_shape = chunk_grid["chunk_shape"]; + ASSERT_EQ(int, "%d", expected_frames_per_chunk, chunk_shape[0]); + ASSERT_EQ(int, "%d", 1, chunk_shape[1]); + ASSERT_EQ(int, "%d", tile_height, chunk_shape[2]); + ASSERT_EQ(int, "%d", tile_width, chunk_shape[3]); + + CHECK("C" == metadata["chunk_memory_layout"]); + CHECK("u1" == metadata["data_type"]); + CHECK(metadata["extensions"].empty()); + + const auto array_shape = metadata["shape"]; + ASSERT_EQ(int, "%d", max_frame_count, array_shape[0]); + ASSERT_EQ(int, "%d", 1, array_shape[1]); + ASSERT_EQ(int, "%d", frame_height, array_shape[2]); + ASSERT_EQ(int, "%d", frame_width, array_shape[3]); + + // sharding + const auto storage_transformers = metadata["storage_transformers"]; + const auto configuration = storage_transformers[0]["configuration"]; + const auto& cps = configuration["chunks_per_shard"]; + ASSERT_EQ(int, "%d", 1, cps[0]); + ASSERT_EQ(int, "%d", 1, cps[1]); + ASSERT_EQ(int, "%d", 3, cps[2]); + ASSERT_EQ(int, "%d", 4, cps[3]); + const size_t chunks_per_shard = cps[0].get() * + cps[1].get() * + cps[2].get() * cps[3].get(); + + const auto index_size = 2 * chunks_per_shard * sizeof(uint64_t); + + // check that each chunked data file is the expected size + const uint32_t bytes_per_chunk = + chunk_shape[0].get() * chunk_shape[1].get() * + chunk_shape[2].get() * chunk_shape[3].get(); + for (auto t = 0; t < std::ceil(max_frame_count / expected_frames_per_chunk); + ++t) { + fs::path path = test_path / "data" / "root" / "0" / + ("c" + std::to_string(t)) / "0" / "0" / "0"; + + CHECK(fs::is_regular_file(path)); + + auto file_size = fs::file_size(path); + + ASSERT_EQ( + int, "%d", chunks_per_shard* bytes_per_chunk + index_size, file_size); + } +} + +void +teardown(AcquireRuntime* runtime) +{ + LOG("Done (OK)"); + acquire_shutdown(runtime); +} + +int +main() +{ + auto runtime = acquire_init(reporter); + + setup(runtime); + acquire(runtime); + validate(runtime); + teardown(runtime); + + return 0; +} diff --git a/tests/write-zarr-with-defaults.cpp b/tests/write-zarr-with-defaults.cpp index bf05bd64..159e086c 100644 --- a/tests/write-zarr-with-defaults.cpp +++ b/tests/write-zarr-with-defaults.cpp @@ -154,8 +154,10 @@ acquire(AcquireRuntime* runtime, const char* filename) } clock_sleep_ms(&throttle, 100.0f); - LOG( - "stream %d expected_frames_per_chunk %d time %f", 0, nframes, clock_toc_ms(&clock)); + LOG("stream %d expected_frames_per_chunk %d time %f", + 0, + nframes, + clock_toc_ms(&clock)); } while (DeviceState_Running == acquire_get_state(runtime) && nframes < props.video[0].max_frame_count);