From 7d6dd619990bab2504f4d9f6ad9df7894cbcdabc Mon Sep 17 00:00:00 2001 From: Alan Liddell Date: Mon, 14 Oct 2024 05:46:07 -0700 Subject: [PATCH] Add Python tests (1) (#11) * bindings created * pip install . working * python -m build working (on Windows anyway) * get it building on linux * Remove test stub (save for the next PR) * Undo an overzealous rename * Add Python wheel build job * Prepare for Python bindings * Don't export the enum values into the module base namespace. * (wip) some basic tests * Revert CMake minimum version and use cmake_policy. Using builtin `BUILD_TESTING` cmake option. * Update build.yml * Update release.yml * some simple tests * add python tests to CI --- .github/workflows/{test_pr.yml => test.yml} | 54 +++- pyproject.toml | 3 +- python/CMakeLists.txt | 1 - python/acquire-zarr-py.cpp | 330 ++++++++++++++++---- python/tests/test_settings.py | 130 ++++++++ 5 files changed, 452 insertions(+), 66 deletions(-) rename .github/workflows/{test_pr.yml => test.yml} (79%) create mode 100644 python/tests/test_settings.py diff --git a/.github/workflows/test_pr.yml b/.github/workflows/test.yml similarity index 79% rename from .github/workflows/test_pr.yml rename to .github/workflows/test.yml index 395527a..1704f4f 100644 --- a/.github/workflows/test_pr.yml +++ b/.github/workflows/test.yml @@ -88,11 +88,6 @@ jobs: submodules: true ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python 3.10 - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - name: Install minio and mcli run: | apt update && apt install -y tmux wget @@ -137,4 +132,51 @@ jobs: ZARR_S3_BUCKET_NAME: ${{ env.MINIO_BUCKET }} ZARR_S3_ACCESS_KEY_ID: ${{ env.MINIO_ACCESS_KEY }} ZARR_S3_SECRET_ACCESS_KEY: ${{ env.MINIO_SECRET_KEY }} - run: ctest -C ${{env.BUILD_TYPE}} -L s3 --output-on-failure \ No newline at end of file + run: ctest -C ${{env.BUILD_TYPE}} -L s3 --output-on-failure + + test_python: + name: Test on ${{ matrix.platform }} + runs-on: ${{ matrix.platform }} + timeout-minutes: 20 + strategy: + fail-fast: false + matrix: + platform: + - "ubuntu-latest" + - "windows-latest" + - "macos-latest" + + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.10.0 + with: + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: true + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install vcpkg + run: | + git clone https://github.com/microsoft/vcpkg.git + cd vcpkg && ./bootstrap-vcpkg.sh + echo "VCPKG_ROOT=${{github.workspace}}/vcpkg" >> $GITHUB_ENV + echo "${{github.workspace}}/vcpkg" >> $GITHUB_PATH + ./vcpkg integrate install + shell: bash + + - name: Install dependencies + run: python -m pip install -U pip "pybind11[global]" cmake build numpy pytest + + - name: Build and install Python bindings + run: python -m pip install . + + - name: Run tests + run: python -m pytest -v + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b47670c..7c5060c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,8 @@ build-backend = "setuptools.build_meta" [tool.pytest.ini_options] minversion = "6.0" -addopts = "-ra -q" +addopts = "-ra -q --color=yes" +log_cli = true # when true, messages are printed immediately testpaths = [ "python/tests", ] \ No newline at end of file diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 30c02aa..b353c55 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,4 @@ project(acquire-zarr-py) -cmake_policy(SET CMP0057 NEW) execute_process(COMMAND python3 -m pybind11 --cmakedir RESULT_VARIABLE pybind11_NOT_FOUND diff --git a/python/acquire-zarr-py.cpp b/python/acquire-zarr-py.cpp index 5635cc8..ae78506 100644 --- a/python/acquire-zarr-py.cpp +++ b/python/acquire-zarr-py.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "acquire.zarr.h" @@ -14,6 +15,81 @@ auto ZarrStreamDeleter = [](ZarrStream_s* stream) { ZarrStream_destroy(stream); } }; + +const char* +data_type_to_str(ZarrDataType t) +{ + switch (t) { + case ZarrDataType_uint8: + return "UINT8"; + case ZarrDataType_uint16: + return "UINT16"; + case ZarrDataType_uint32: + return "UINT32"; + case ZarrDataType_uint64: + return "UINT64"; + case ZarrDataType_int8: + return "INT8"; + case ZarrDataType_int16: + return "INT16"; + case ZarrDataType_int32: + return "INT32"; + case ZarrDataType_int64: + return "INT64"; + case ZarrDataType_float32: + return "FLOAT32"; + case ZarrDataType_float64: + return "FLOAT64"; + default: + return "UNKNOWN"; + } +} + +const char* +compressor_to_str(ZarrCompressor c) +{ + switch (c) { + case ZarrCompressor_None: + return "NONE"; + case ZarrCompressor_Blosc1: + return "BLOSC1"; + default: + return "UNKNOWN"; + } +} + +const char* +compression_codec_to_str(ZarrCompressionCodec c) +{ + switch (c) { + case ZarrCompressionCodec_None: + return "NONE"; + case ZarrCompressionCodec_BloscLZ4: + return "BLOSC_LZ4"; + case ZarrCompressionCodec_BloscZstd: + return "BLOSC_ZSTD"; + default: + return "UNKNOWN"; + } +} + +const char* +dimension_type_to_str(ZarrDimensionType t) +{ + switch (t) { + case ZarrDimensionType_Space: + return "SPACE"; + case ZarrDimensionType_Channel: + return "CHANNEL"; + case ZarrDimensionType_Time: + return "TIME"; + case ZarrDimensionType_Other: + return "OTHER"; + default: + return "UNKNOWN"; + } +} + } // namespace class PyZarrS3Settings @@ -40,6 +116,17 @@ class PyZarrS3Settings } const std::string& secret_access_key() const { return secret_access_key_; } + std::string repr() const + { + auto secret_access_key = secret_access_key_.size() < 6 + ? secret_access_key_ + : secret_access_key_.substr(0, 5) + "..."; + + return "S3Settings(endpoint='" + endpoint_ + "', bucket_name='" + + bucket_name_ + "', access_key_id='" + access_key_id_ + + "', secret_access_key='" + secret_access_key + "')"; + } + private: std::string endpoint_; std::string bucket_name_; @@ -65,6 +152,16 @@ class PyZarrCompressionSettings uint8_t shuffle() const { return shuffle_; } void set_shuffle(uint8_t shuffle) { shuffle_ = shuffle; } + std::string repr() const + { + return "CompressionSettings(compressor=Compressor." + + std::string(compressor_to_str(compressor_)) + + ", codec=CompressionCodec." + + std::string(compression_codec_to_str(codec_)) + + ", level=" + std::to_string(level_) + + ", shuffle=" + std::to_string(shuffle_) + ")"; + } + private: ZarrCompressor compressor_; ZarrCompressionCodec codec_; @@ -93,20 +190,34 @@ class PyZarrDimensionProperties uint32_t shard_size_chunks() const { return shard_size_chunks_; } void set_shard_size_chunks(uint32_t size) { shard_size_chunks_ = size; } + std::string repr() const + { + return "Dimension(name='" + name_ + "', kind=DimensionType." + + std::string(dimension_type_to_str(type_)) + + ", array_size_px=" + std::to_string(array_size_px_) + + ", chunk_size_px=" + std::to_string(chunk_size_px_) + + ", shard_size_chunks=" + std::to_string(shard_size_chunks_) + + ")"; + } + private: std::string name_; - ZarrDimensionType type_; - uint32_t array_size_px_; - uint32_t chunk_size_px_; - uint32_t shard_size_chunks_; + ZarrDimensionType type_{ ZarrDimensionType_Space }; + uint32_t array_size_px_{ 0 }; + uint32_t chunk_size_px_{ 0 }; + uint32_t shard_size_chunks_{ 0 }; }; +PYBIND11_MAKE_OPAQUE(std::vector); + class PyZarrStreamSettings { public: PyZarrStreamSettings() = default; ~PyZarrStreamSettings() = default; + std::vector dimensions; + std::string store_path() const { return store_path_; } void set_store_path(const std::string& path) { store_path_ = path; } @@ -135,15 +246,6 @@ class PyZarrStreamSettings compression_settings_ = settings; } - std::vector dimensions() const - { - return dimensions_; - } - void set_dimensions(const std::vector& dims) - { - dimensions_ = dims; - } - bool multiscale() const { return multiscale_; } void set_multiscale(bool multiscale) { multiscale_ = multiscale; } @@ -158,10 +260,9 @@ class PyZarrStreamSettings std::optional custom_metadata_; std::optional s3_settings_; std::optional compression_settings_; - std::vector dimensions_; bool multiscale_ = false; - ZarrDataType data_type_; - ZarrVersion version_; + ZarrDataType data_type_{ ZarrDataType_uint8 }; + ZarrVersion version_{ ZarrVersion_2 }; }; class PyZarrStream @@ -211,7 +312,7 @@ class PyZarrStream stream_settings.compression_settings = &compression_settings; } - const auto& dims = settings.dimensions(); + const auto& dims = settings.dimensions; std::vector dimension_props; std::vector dimension_names(dims.size()); @@ -280,6 +381,8 @@ class PyZarrStream PYBIND11_MODULE(acquire_zarr, m) { + using namespace pybind11::literals; + m.doc() = R"pbdoc( Acquire Zarr Writer Python API ----------------------- @@ -289,44 +392,64 @@ PYBIND11_MODULE(acquire_zarr, m) append )pbdoc"; - py::enum_(m, "Version") + py::bind_vector>(m, + "VectorDimension"); + + py::enum_(m, "ZarrVersion") .value("V2", ZarrVersion_2) - .value("V3", ZarrVersion_3) - .export_values(); - - py::enum_(m, "DType") - .value("DTYPE_UINT8", ZarrDataType_uint8) - .value("DTYPE_UINT16", ZarrDataType_uint16) - .value("DTYPE_UINT32", ZarrDataType_uint32) - .value("DTYPE_UINT64", ZarrDataType_uint64) - .value("DTYPE_INT8", ZarrDataType_int8) - .value("DTYPE_INT16", ZarrDataType_int16) - .value("DTYPE_INT32", ZarrDataType_int32) - .value("DTYPE_INT64", ZarrDataType_int64) - .value("DTYPE_FLOAT32", ZarrDataType_float32) - .value("DTYPE_FLOAT64", ZarrDataType_float64) - .export_values(); + .value("V3", ZarrVersion_3); + + py::enum_(m, "DataType") + .value(data_type_to_str(ZarrDataType_uint8), ZarrDataType_uint8) + .value(data_type_to_str(ZarrDataType_uint16), ZarrDataType_uint16) + .value(data_type_to_str(ZarrDataType_uint32), ZarrDataType_uint32) + .value(data_type_to_str(ZarrDataType_uint64), ZarrDataType_uint64) + .value(data_type_to_str(ZarrDataType_int8), ZarrDataType_int8) + .value(data_type_to_str(ZarrDataType_int16), ZarrDataType_int16) + .value(data_type_to_str(ZarrDataType_int32), ZarrDataType_int32) + .value(data_type_to_str(ZarrDataType_int64), ZarrDataType_int64) + .value(data_type_to_str(ZarrDataType_float32), ZarrDataType_float32) + .value(data_type_to_str(ZarrDataType_float64), ZarrDataType_float64); py::enum_(m, "Compressor") - .value("COMPRESSOR_NONE", ZarrCompressor_None) - .value("COMPRESSOR_BLOSC1", ZarrCompressor_Blosc1) - .export_values(); + .value(compressor_to_str(ZarrCompressor_None), ZarrCompressor_None) + .value(compressor_to_str(ZarrCompressor_Blosc1), ZarrCompressor_Blosc1); py::enum_(m, "CompressionCodec") - .value("COMPRESSION_NONE", ZarrCompressionCodec_None) - .value("COMPRESSION_BLOSC_LZ4", ZarrCompressionCodec_BloscLZ4) - .value("COMPRESSION_BLOSC_ZSTD", ZarrCompressionCodec_BloscZstd) - .export_values(); + .value(compression_codec_to_str(ZarrCompressionCodec_None), + ZarrCompressionCodec_None) + .value(compression_codec_to_str(ZarrCompressionCodec_BloscLZ4), + ZarrCompressionCodec_BloscLZ4) + .value(compression_codec_to_str(ZarrCompressionCodec_BloscZstd), + ZarrCompressionCodec_BloscZstd); py::enum_(m, "DimensionType") - .value("DIMENSION_TYPE_SPACE", ZarrDimensionType_Space) - .value("DIMENSION_TYPE_CHANNEL", ZarrDimensionType_Channel) - .value("DIMENSION_TYPE_TIME", ZarrDimensionType_Time) - .value("DIMENSION_TYPE_OTHER", ZarrDimensionType_Other) - .export_values(); - - py::class_(m, "S3Settings") - .def(py::init<>()) + .value(dimension_type_to_str(ZarrDimensionType_Space), + ZarrDimensionType_Space) + .value(dimension_type_to_str(ZarrDimensionType_Channel), + ZarrDimensionType_Channel) + .value(dimension_type_to_str(ZarrDimensionType_Time), + ZarrDimensionType_Time) + .value(dimension_type_to_str(ZarrDimensionType_Other), + ZarrDimensionType_Other); + + py::class_(m, "S3Settings", py::dynamic_attr()) + .def(py::init([](py::kwargs kwargs) { + PyZarrS3Settings settings; + if (kwargs.contains("endpoint")) + settings.set_endpoint(kwargs["endpoint"].cast()); + if (kwargs.contains("bucket_name")) + settings.set_bucket_name( + kwargs["bucket_name"].cast()); + if (kwargs.contains("access_key_id")) + settings.set_access_key_id( + kwargs["access_key_id"].cast()); + if (kwargs.contains("secret_access_key")) + settings.set_secret_access_key( + kwargs["secret_access_key"].cast()); + return settings; + })) + .def("__repr__", [](const PyZarrS3Settings& self) { return self.repr(); }) .def_property("endpoint", &PyZarrS3Settings::endpoint, &PyZarrS3Settings::set_endpoint) @@ -340,8 +463,23 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrS3Settings::secret_access_key, &PyZarrS3Settings::set_secret_access_key); - py::class_(m, "ZarrCompressionSettings") - .def(py::init<>()) + py::class_( + m, "CompressionSettings", py::dynamic_attr()) + .def(py::init([](py::kwargs kwargs) { + PyZarrCompressionSettings settings; + if (kwargs.contains("compressor")) + settings.set_compressor( + kwargs["compressor"].cast()); + if (kwargs.contains("codec")) + settings.set_codec(kwargs["codec"].cast()); + if (kwargs.contains("level")) + settings.set_level(kwargs["level"].cast()); + if (kwargs.contains("shuffle")) + settings.set_shuffle(kwargs["shuffle"].cast()); + return settings; + })) + .def("__repr__", + [](const PyZarrCompressionSettings& self) { return self.repr(); }) .def_property("compressor", &PyZarrCompressionSettings::compressor, &PyZarrCompressionSettings::set_compressor) @@ -355,12 +493,28 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrCompressionSettings::shuffle, &PyZarrCompressionSettings::set_shuffle); - py::class_(m, "ZarrDimensionProperties") - .def(py::init<>()) + py::class_(m, "Dimension", py::dynamic_attr()) + .def(py::init([](py::kwargs kwargs) { + PyZarrDimensionProperties props; + if (kwargs.contains("name")) + props.set_name(kwargs["name"].cast()); + if (kwargs.contains("kind")) + props.set_type(kwargs["kind"].cast()); + if (kwargs.contains("array_size_px")) + props.set_array_size_px(kwargs["array_size_px"].cast()); + if (kwargs.contains("chunk_size_px")) + props.set_chunk_size_px(kwargs["chunk_size_px"].cast()); + if (kwargs.contains("shard_size_chunks")) + props.set_shard_size_chunks( + kwargs["shard_size_chunks"].cast()); + return props; + })) + .def("__repr__", + [](const PyZarrDimensionProperties& self) { return self.repr(); }) .def_property("name", &PyZarrDimensionProperties::name, &PyZarrDimensionProperties::set_name) - .def_property("type", + .def_property("kind", &PyZarrDimensionProperties::type, &PyZarrDimensionProperties::set_type) .def_property("array_size_px", @@ -373,8 +527,70 @@ PYBIND11_MODULE(acquire_zarr, m) &PyZarrDimensionProperties::shard_size_chunks, &PyZarrDimensionProperties::set_shard_size_chunks); - py::class_(m, "ZarrStreamSettings") - .def(py::init<>()) + py::class_(m, "StreamSettings", py::dynamic_attr()) + .def(py::init([](py::kwargs kwargs) { + PyZarrStreamSettings settings; + + if (kwargs.contains("store_path")) + settings.set_store_path(kwargs["store_path"].cast()); + + if (kwargs.contains("custom_metadata")) + settings.set_custom_metadata( + kwargs["custom_metadata"].cast>()); + + if (kwargs.contains("s3")) + settings.set_s3( + kwargs["s3"].cast>()); + + if (kwargs.contains("compression")) + settings.set_compression( + kwargs["compression"] + .cast>()); + + if (kwargs.contains("dimensions")) + settings.dimensions = + kwargs["dimensions"] + .cast>(); + + if (kwargs.contains("multiscale")) + settings.set_multiscale(kwargs["multiscale"].cast()); + + if (kwargs.contains("data_type")) + settings.set_data_type(kwargs["data_type"].cast()); + + if (kwargs.contains("version")) + settings.set_version(kwargs["version"].cast()); + + return settings; + })) + .def("__repr__", + [](const PyZarrStreamSettings& self) { + std::string repr = + "StreamSettings(store_path='" + self.store_path(); + if (self.custom_metadata().has_value()) { + repr += + ", custom_metadata='" + self.custom_metadata().value(); + } + + if (self.s3().has_value()) { + repr += ", s3=" + self.s3()->repr(); + } + if (self.compression().has_value()) { + repr += ", compression=" + self.compression()->repr(); + } + repr += ", dimensions=["; + for (const auto& dim : self.dimensions) { + repr += dim.repr() + ", "; + } + repr += + "], multiscale=" + std::to_string(self.multiscale()) + + ", data_type=DataType." + + std::string(data_type_to_str(self.data_type())) + + ", version=ZarrVersion." + + std::string(self.version() == ZarrVersion_2 ? "V2" : "V3") + + ")"; + return repr; + }) .def_property("store_path", &PyZarrStreamSettings::store_path, &PyZarrStreamSettings::set_store_path) @@ -423,9 +639,7 @@ PYBIND11_MODULE(acquire_zarr, m) self.set_compression(obj.cast()); } }) - .def_property("dimensions", - &PyZarrStreamSettings::dimensions, - &PyZarrStreamSettings::set_dimensions) + .def_readwrite("dimensions", &PyZarrStreamSettings::dimensions) .def_property("multiscale", &PyZarrStreamSettings::multiscale, &PyZarrStreamSettings::set_multiscale) diff --git a/python/tests/test_settings.py b/python/tests/test_settings.py new file mode 100644 index 0000000..2ca7afd --- /dev/null +++ b/python/tests/test_settings.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 + +import json +from pathlib import Path + +import numpy as np +import pytest + +import acquire_zarr + + +@pytest.fixture(scope="function") +def settings(): + return acquire_zarr.StreamSettings() + + +def test_settings_set_store_path(settings): + assert settings.store_path == "" + + this_dir = str(Path(__file__).parent) + settings.store_path = this_dir + + assert settings.store_path == this_dir + + +def test_settings_set_custom_metadata(settings): + assert settings.custom_metadata is None + + metadata = json.dumps({"foo": "bar"}) + settings.custom_metadata = metadata + + assert settings.custom_metadata == metadata + + +def test_set_s3_settings(settings): + assert settings.s3 is None + + s3_settings = acquire_zarr.S3Settings( + endpoint="foo", + bucket_name="bar", + access_key_id="baz", + secret_access_key="qux", + ) + settings.s3 = s3_settings + + assert settings.s3 is not None + assert settings.s3.endpoint == "foo" + assert settings.s3.bucket_name == "bar" + assert settings.s3.access_key_id == "baz" + assert settings.s3.secret_access_key == "qux" + + +def test_set_compression_settings(settings): + assert settings.compression is None + + compression_settings = acquire_zarr.CompressionSettings( + compressor=acquire_zarr.Compressor.BLOSC1, + codec=acquire_zarr.CompressionCodec.BLOSC_ZSTD, + level=5, + shuffle=2, + ) + + settings.compression = compression_settings + assert settings.compression is not None + assert settings.compression.compressor == acquire_zarr.Compressor.BLOSC1 + assert settings.compression.codec == acquire_zarr.CompressionCodec.BLOSC_ZSTD + assert settings.compression.level == 5 + assert settings.compression.shuffle == 2 + + +def test_set_dimensions(settings): + assert len(settings.dimensions) == 0 + + settings.dimensions.append(acquire_zarr.Dimension( + name="foo", + kind=acquire_zarr.DimensionType.TIME, + array_size_px=1, + chunk_size_px=2, + shard_size_chunks=3, + )) + assert len(settings.dimensions) == 1 + assert settings.dimensions[0].name == "foo" + assert settings.dimensions[0].kind == acquire_zarr.DimensionType.TIME + assert settings.dimensions[0].array_size_px == 1 + assert settings.dimensions[0].chunk_size_px == 2 + assert settings.dimensions[0].shard_size_chunks == 3 + + settings.dimensions.append(acquire_zarr.Dimension( + name="bar", + kind=acquire_zarr.DimensionType.SPACE, + array_size_px=4, + chunk_size_px=5, + shard_size_chunks=6, + )) + assert len(settings.dimensions) == 2 + assert settings.dimensions[1].name == "bar" + assert settings.dimensions[1].kind == acquire_zarr.DimensionType.SPACE + assert settings.dimensions[1].array_size_px == 4 + assert settings.dimensions[1].chunk_size_px == 5 + assert settings.dimensions[1].shard_size_chunks == 6 + + settings.dimensions.append(acquire_zarr.Dimension( + name="baz", + kind=acquire_zarr.DimensionType.OTHER, + array_size_px=7, + chunk_size_px=8, + shard_size_chunks=9, + )) + assert len(settings.dimensions) == 3 + assert settings.dimensions[2].name == "baz" + assert settings.dimensions[2].kind == acquire_zarr.DimensionType.OTHER + assert settings.dimensions[2].array_size_px == 7 + assert settings.dimensions[2].chunk_size_px == 8 + assert settings.dimensions[2].shard_size_chunks == 9 + + +def test_set_multiscale(settings): + assert settings.multiscale is False + + settings.multiscale = True + + assert settings.multiscale is True + + +def test_set_version(settings): + assert settings.version == acquire_zarr.ZarrVersion.V2 + + settings.version = acquire_zarr.ZarrVersion.V3 + + assert settings.version == acquire_zarr.ZarrVersion.V3