From 17421101f5d6213fa17fc0fabfbaedfcc070bd70 Mon Sep 17 00:00:00 2001 From: mavaylon1 Date: Wed, 11 Jan 2023 09:34:41 -0800 Subject: [PATCH 01/27] Create project_action.yml (#68) --- .github/workflows/project_action.yml | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/project_action.yml diff --git a/.github/workflows/project_action.yml b/.github/workflows/project_action.yml new file mode 100644 index 00000000..ad2a9c73 --- /dev/null +++ b/.github/workflows/project_action.yml @@ -0,0 +1,34 @@ +name: Add issues to Development Project Board + +on: + issues: + types: + - opened + +jobs: + add-to-project: + name: Add issue to project + runs-on: ubuntu-latest + steps: + - name: GitHub App token + id: generate_token + uses: tibdex/github-app-token@v1.7.0 + with: + app_id: ${{ secrets.APP_ID }} + private_key: ${{ secrets.APP_PEM }} + + - name: Add to Developer Board + env: + TOKEN: ${{ steps.generate_token.outputs.token }} + uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/hdmf-dev/projects/7 + github-token: ${{ env.TOKEN }} + + - name: Add to Community Board + env: + TOKEN: ${{ steps.generate_token.outputs.token }} + uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/hdmf-dev/projects/8 + github-token: ${{ env.TOKEN }} From 42134e225f418b8671d91b2d7f087bd2fb8be405 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 11 Jan 2023 10:55:02 -0800 Subject: [PATCH 02/27] Remove status warning on ZarrIO.__init__ (#67) * Remove warning from ZarrIO backend * Remove unnecessary warnings filters from the tutorials * Add status text to the docs --- README.rst | 6 +++--- docs/gallery/plot_nwb_zarrio.py | 2 -- docs/gallery/plot_zarr_dataset_io.py | 3 --- docs/gallery/plot_zarr_io.py | 3 --- docs/source/index.rst | 3 +++ docs/source/overview.rst | 2 ++ src/hdmf_zarr/backend.py | 3 --- 7 files changed, 8 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index bc465924..5af642ff 100644 --- a/README.rst +++ b/README.rst @@ -1,12 +1,12 @@ .. image:: docs/source/figures/logo_hdmf_zarr.png :width: 400 - + hdmf-zarr ========= The ``hdmf-zarr`` library implements a Zarr backend for HDMF as well as convenience classes for integration of Zarr with PyNWB to support writing of NWB files to Zarr. -The Zarr backend is currently experimental and may still change. See the `overiew page `_ for an overview of the available features and known limitations of hdmf-zarr. +**Status:** The Zarr backend is **under development** and may still change. See the `overiew page `_ for an overview of the available features and known limitations of hdmf-zarr. Latest Release @@ -36,7 +36,7 @@ CI / Health Status .. image:: https://github.com/hdmf-dev/hdmf-zarr/workflows/Deploy%20release/badge.svg :target: https://github.com/hdmf-dev/hdmf-zarr/actions/workflows/deploy_release.yml - + .. image:: https://github.com/hdmf-dev/hdmf-zarr/workflows/black/badge.svg :target: https://github.com/hdmf-dev/hdmf-zarr/actions/workflows/black.yml diff --git a/docs/gallery/plot_nwb_zarrio.py b/docs/gallery/plot_nwb_zarrio.py index 8065b262..95eed3db 100644 --- a/docs/gallery/plot_nwb_zarrio.py +++ b/docs/gallery/plot_nwb_zarrio.py @@ -23,8 +23,6 @@ """ # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_nwbzarrio.png' # Ignore warnings about the development of the ZarrIO backend -import warnings -warnings.filterwarnings('ignore', '.*The ZarrIO backend is experimental*', ) from datetime import datetime from dateutil.tz import tzlocal diff --git a/docs/gallery/plot_zarr_dataset_io.py b/docs/gallery/plot_zarr_dataset_io.py index 753ac2f5..c4307291 100644 --- a/docs/gallery/plot_zarr_dataset_io.py +++ b/docs/gallery/plot_zarr_dataset_io.py @@ -14,9 +14,6 @@ to store some arbitrary data columns. """ # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_zarr_dataset_io.png' -# Ignore warnings about the development of the ZarrIO backend -import warnings -warnings.filterwarnings('ignore', '.*The ZarrIO backend is experimental*', ) # Import DynamicTable and get the ROOT_NAME from hdmf.common.table import DynamicTable, VectorData diff --git a/docs/gallery/plot_zarr_io.py b/docs/gallery/plot_zarr_io.py index 0d4965fe..64e65a47 100644 --- a/docs/gallery/plot_zarr_io.py +++ b/docs/gallery/plot_zarr_io.py @@ -24,9 +24,6 @@ of a file does not appear in the path to locate it. """ # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_zarr_io.png' -# Ignore warnings about the development of the ZarrIO backend -import warnings -warnings.filterwarnings('ignore', '.*The ZarrIO backend is experimental*', ) # Import DynamicTable and get the ROOT_NAME from hdmf.common.table import DynamicTable diff --git a/docs/source/index.rst b/docs/source/index.rst index f08738db..ef2632a1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,6 +10,9 @@ Welcome to hdmf-zarr's documentation! convenience classes for integration of Zarr with `PyNWB `_ to support writing of NWB files to `Zarr `_. +**Status:** The Zarr backend is **under development** and may still change. See the +:ref:`sec-overview` section for a description of available features and known limitations of hdmf-zarr. + Citing hdmf-zarr ^^^^^^^^^^^^^^^^ diff --git a/docs/source/overview.rst b/docs/source/overview.rst index d8ca65d7..74f874f5 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -1,3 +1,5 @@ +.. _sec-overview: + Overview ======== diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 35c8d268..43444c80 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -87,9 +87,6 @@ def __init__(self, **kwargs): # Codec class to be used. Alternates, e.g., =numcodecs.JSON self.__codec_cls = numcodecs.pickles.Pickle if object_codec_class is None else object_codec_class super().__init__(manager, source=path) - warn_msg = ("The ZarrIO backend is experimental. It is under active development. " - "The ZarrIO backend may change any time and backward compatibility is not guaranteed.") - warnings.warn(warn_msg) @property def path(self): From f59da74c26aaeeb991961265fbb8a50609214249 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 18 Jan 2023 00:01:33 -0800 Subject: [PATCH 03/27] [Feature] Add support for using select user-defined zarr stores (#62) * Add support for using select user-defined zarr stores * Update resolution of references to work also for file-based Zarr stores * Update test_io_zarr.py to allow file-based Zarr stores * Updated changelog * Add ZarrIO.file property ease implementation of tests * Refactored ZarrIO tests for consistency and to run all backends via dedicated test classes * Update NWBZarrIO to support the new path options from ZarrIO * Update test_io_convert.py to test with all supported zarr.storage backends * Added docs on how to integrate new backends stores with ZarrIO * Update storage docs to add missing reserved links and groups * Add DEFAULT_SPEC_LOC_DIR and SUPPORTED_ZARR_STORES module variable of backend.py * Add Mixin and test cases to test conversion between Zarr and Zarr * Update ZarrIO tutorial to describe using custom data stores * Increase HDMF version to 3.5 * Removed filepath param from get_builder_exists_on_disk * Consistently close file in test when explicitly opened Co-authored-by: Ryan Ly --- CHANGELOG.md | 26 +- docs/gallery/plot_zarr_io.py | 46 ++ docs/source/index.rst | 1 + docs/source/integrating_data_stores.rst | 143 ++++ docs/source/storage.rst | 42 +- requirements-min.txt | 2 +- requirements.txt | 2 +- setup.py | 2 +- src/hdmf_zarr/backend.py | 130 ++-- src/hdmf_zarr/nwb.py | 16 +- .../{test_io_zarr.py => base_tests_zarrio.py} | 697 +++++++++++------- tests/unit/test_io_convert.py | 348 +++++++-- tests/unit/test_zarrio.py | 124 ++++ 13 files changed, 1153 insertions(+), 426 deletions(-) create mode 100644 docs/source/integrating_data_stores.rst rename tests/unit/{test_io_zarr.py => base_tests_zarrio.py} (69%) create mode 100644 tests/unit/test_zarrio.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 024af124..28c84972 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,28 @@ # HDMF-ZARR Changelog -## 0.2.0 (Latest) +## 0.3.0 (Upcoming) + +### New Features +* Added support, tests, and docs for using ``DirectoryStore``, ``TempStore``, and + ``NestedDirectoryStore`` Zarr storage backends with ``ZarrIO`` and ``NWBZarrIO`` + @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) + +### Minor enhancements +* Updated handling of references on read to simplify future integration of file-based Zarr + stores (e.g., ZipStore or database stores) @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) + +### Test suite enhancements +* Modularized unit tests to simplify running tests for multiple Zarr storage backends + @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) + +### Docs +* Added developer documentation on how to integrate new storage backends with ZarrIO + [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) + +### API Changes +* Removed unused ``filepath`` argument from ``ZarrIO.get_builder_exists_on_disk`` [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) + +## 0.2.0 (January 6, 2023) ### Bugs * Updated the storage of links/references to use paths relative to the current Zarr file to avoid breaking @@ -22,7 +44,7 @@ * Removed dependency on ``dandi`` library for data download in the conversion tutorial by storing the NWB files as local resources @oruebel [#61](https://github.com/hdmf-dev/hdmf-zarr/pull/61) -## 0.1.0 +## 0.1.0 (August 23, 2022) ### New features diff --git a/docs/gallery/plot_zarr_io.py b/docs/gallery/plot_zarr_io.py index 64e65a47..dba21b62 100644 --- a/docs/gallery/plot_zarr_io.py +++ b/docs/gallery/plot_zarr_io.py @@ -87,6 +87,7 @@ # zarr_io.close() + ############################################################################### # Converting to/from HDF5 using ``export`` # ---------------------------------------- @@ -137,3 +138,48 @@ intable_from_zarr = zarr_read_io.read() intable_zarr_df = intable_from_zarr.to_dataframe() intable_zarr_df # display the table in the gallery output + + +############################################################################### +# Using custom Zarr storage backends +# ----------------------------------- +# +# :py:class:`~hdmf_zarr.backend.ZarrIO` supports a subset of data stores available +# for Zarr, e.g., :py:class`~zarr.storage.DirectoryStore`, :py:class`~zarr.storage.TempStore`, +# and :py:class`~zarr.storage.NestedDirectoryStore`. The supported stores are defined +# in :py:attr:`~hdmf_zarr.backend.SUPPORTED_ZARR_STORES`. A main limitation to supporting +# all possible Zarr stores in :py:class:`~hdmf_zarr.backend.ZarrIO` is due to the fact that +# Zarr does not support links and references. +# +# .. note: +# +# See :ref:`sec-integrating-zarr-data-store` for details on how to integrate +# new stores with :py:class:`~hdmf_zarr.backend.ZarrIO`. +# +# To use a store other than the default, we simply need to instantiate the store +# and set pass it to :py:class:`~hdmf_zarr.backend.ZarrIO` via the ``path`` parameter. +# Here we use a :py:class`~zarr.storage.NestedDirectoryStore` to write a simple +# :py:class:`hdmf.common.CSRMatrix` container to disk. +# + +from zarr.storage import NestedDirectoryStore +from hdmf.common import CSRMatrix + +zarr_nsd_dir = "example_nested_store.zarr" +store = NestedDirectoryStore(zarr_dir) +csr_container = CSRMatrix( + name=ROOT_NAME, + data=[1, 2, 3, 4, 5, 6], + indices=[0, 2, 2, 0, 1, 2], + indptr=[0, 2, 3, 6], + shape=(3, 3)) + +# Write the csr_container to Zarr using a NestedDirectoryStore +with ZarrIO(path=zarr_nsd_dir, manager=get_manager(), mode='w') as zarr_io: + zarr_io.write(csr_container) + +# Read the CSR matrix to confirm the data was written correctly +with ZarrIO(path=zarr_nsd_dir, manager=get_manager(), mode='r') as zarr_io: + csr_read = zarr_io.read() + print(" data=%s\n indices=%s\n indptr=%s\n shape=%s" % + (str(csr_read.data), str(csr_read.indices), str(csr_read.indptr), str(csr_read.shape))) diff --git a/docs/source/index.rst b/docs/source/index.rst index ef2632a1..9656c3f7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -34,6 +34,7 @@ Citing hdmf-zarr :caption: For Developers: storage + integrating_data_stores hdmf_zarr Indices and tables diff --git a/docs/source/integrating_data_stores.rst b/docs/source/integrating_data_stores.rst new file mode 100644 index 00000000..86c886bc --- /dev/null +++ b/docs/source/integrating_data_stores.rst @@ -0,0 +1,143 @@ +.. _sec-integrating-zarr-data-stores: + +================================ +Integrating New Zarr Data Stores +================================ + +:py:class:`~hdmf_zarr.backend.ZarrIO` by default uses the Zarr +:zarr-docs:`DirectoryStore ` via +the :py:meth:`zarr.convenience.open` method. :py:class:`~hdmf_zarr.backend.ZarrIO` further +supports all stores listed in :py:class:`~hdmf_zarr.backend.SUPPORTED_ZARR_STORES`. +Users can specify a particular store using the ``path`` parameter when creating a new +:py:class:`~hdmf_zarr.backend.ZarrIO` instance. This document discusses key steps towards +integrating other data stores available for Zarr with :py:class:`~hdmf_zarr.backend.ZarrIO`. + + +Updating ZarrIO +=============== + +1. Import and add the new storage class to the :py:class:`~hdmf_zarr.backend.SUPPORTED_ZARR_STORES`. + This will in turn allow instances of your new storage class to be passed as a ``path`` parameter + to :py:meth:`~hdmf_zarr.backend.ZarrIO.__init__` + and :py:meth:`~hdmf_zarr.backend.ZarrIO.load_namespaces` and pass + :py:meth:`~hdmf.utils.docval` validation for these functions. + + * If your store has a ``.path`` property then the :py:attr:`~hdmf.backends.io.HDMFIO.source` property + will be set accordingly in ``__init__`` in :py:class:`~hdmf_zarr.backend.ZarrIO`, otherwise + ``__init__`` may need to be updated to set a correct ``source`` (used, e.g., to define links). + +2. Update :py:meth:`~hdmf_zarr.backend.ZarrIO.open` and :py:meth:`~hdmf_zarr.backend.ZarrIO.close` + as necessary. + +3. Depending on the type of data store, it may also be necessary to update the handling of links + and references in :py:class:`~hdmf_zarr.backend.ZarrIO`. In principle, reading and writing of + links should not need to change, however, in particular the + :py:meth:`~hdmf_zarr.backend.ZarrIO.__resolve_ref` and + :py:meth:`~hdmf_zarr.backend.ZarrIO.get_builder_exists_on_disk` + method may need to be updated to ensure + references are opened correctly on read for files stored with your new store. The + :py:meth:`~hdmf_zarr.backend.ZarrIO.__get_ref` function may also need to be updated, in + particular in case the links to your store also modify the storage schema for links + (e.g., if you need to store additional metadata in order to resolve links to your store). + +Updating NWBZarrIO +================== + +In most cases we should not need to update :py:class:`~hdmf_zarr.nwb.NWBZarrIO` as it inherits +directly from :py:class:`~hdmf_zarr.backend.ZarrIO`. However, in particular if the interface for +``__init__`` has changed for :py:class:`~hdmf_zarr.backend.ZarrIO`, +then we may also need to modify :py:class:`~hdmf_zarr.nwb.NWBZarrIO` accordingly. + +Updating Unit Tests +=================== + +Much of the core test harness of ``hdmf_zarr`` is modularized to simplify running existing +tests with new storage backends. In this way, we can quickly create a collection of common tests +for new backends, and new test cases added to the test suite can be run with all backends. +The relevant test class are located in the `/tests/unit `_ +directory of the hdmf_zarr repository. + +test_zarrio.py +-------------- +`base_tests_zarrio.py `_ +provides a collection of base classes that define common +test cases to test basic functionality of :py:class:`~hdmf_zarr.backend.ZarrIO`. Using these base classes, the +`test_zarrio.py `_ module +then implements concrete tests for various backends. To create tests for a new data store, we need to +add the following main classes (while ```` in the code below would need to be replaced with the +class name of the new data store): + +1. **Create tests for new data store:** Add the following main classes (while ```` in the code below would need to be replaces with the class name of the new data store): + + .. code-block:: python + + ######################################### + # tests + ######################################### + class TestZarrWriter(BaseTestZarrWriter): + """Test writing of builder with Zarr using a custom """ + def setUp(self): + super().setUp() + self.store = () + self.store_path = self.store.path + + + class TestZarrWriteUnit(BaseTestZarrWriteUnit): + """Unit test for individual write functions using a custom """ + def setUp(self): + super().setUp() + self.store = () + self.store_path = self.store.path + + + class TestExportZarrToZarr(BaseTestExportZarrToZarr): + """Test exporting Zarr to Zarr using .""" + def setUp(self): + super().setUp() + self.stores = [() for i in range(len(self.store_path))] + self.store_paths = [s.path for s in self.stores] + +.. note: + + In the case of ``BaseTestZarrWriter`` and ``BaseTestZarrWriteUnit`` the ``self.store`` variable defines + the data store to use with :py:class:`~hdmf_zarr.backend.ZarrIO` while running tests. + ``self.store_path`` is used during ``tearDown`` to clean up files as well as in some cases + to setup links in test ``Builders`` or if a test case requires opening a file with Zarr directly. + + ``BaseTestExportZarrToZarr`` tests exporting between Zarr data stores but requires 4 stores and + paths to be specified via the ``self.store`` and ``self.store_path`` variable. To test export + between your new backend, you can simply set up all 4 instances to the new store while using different + storage paths for the different instances (which are saved in ``self.store_paths``). + +2. **Update ``base_tests_zarrio.reopen_store``** If our new data store cannot be reused after + it has been closed via :py:meth:`~hdmf_zarr.backend.ZarrIO.close`, then update the method + to either reopen or create a new equivalent data store that can be used for read. + The function is used in tests that write data, then close the ZarrIO, and + create a new ZarrIO to read and validate the data. + +3. **Run and update tests** Depending on your data store, some test cases in ``BaseTestZarrWriter``, ``BaseTestZarrWriteUnit`` + or ``BaseTestExportZarrToZarr`` may need to be updated to correctly work with our data store. + Simply run the test suite to see if any cases are failing to see whether the ``setUp`` in your + test classes or any specific test cases may need to be updated. + +test_io_convert.py +------------------ +`test_io_convert.py `_ +uses a collection of mixin classes to define custom test classes to test export from one IO backend +to another. As such, the test cases here typically first write to one target and then export to +another target and then compare that the data between the two files is consistent. + +1. **Update ``MixinTestHDF5ToZarr``, ``MixinTestZarrToZarr``, and ``MixinTestZarrToZarr``** + mixin classes to add the new backend to the ``WRITE_PATHS`` (if Zarr is the initial write + target) and/or ``EXPORT_PATHS`` (if Zarr is the export target) variables to define our + store as a write or export store for :py:class:`~hdmf_zarr.backend.ZarrIO`, respectively. + Once we have added our new store as write/export targets to these mixins, all test cases + defined in the module will be run with our new backend. Specifically, we here commonly + need to add an instance of our new data store to: + + * ``MixinTestHDF5ToZarr.EXPORT_PATHS`` + * ``MixinTestZarrToHDF5.WRITE_PATHS`` + * ``MixinTestZarrToZarr.WRITE_PATHS`` and ``MixinTestZarrToZarr.EXPORT_PATHS`` + +2. **Update tests and ZarrIO as necessary** Run the test suite and fix any identified issues. + diff --git a/docs/source/storage.rst b/docs/source/storage.rst index a336c6de..b391dd12 100644 --- a/docs/source/storage.rst +++ b/docs/source/storage.rst @@ -1,11 +1,11 @@ .. _sec-zarr-storage: -======== -Storage -======== +===================== +Storage Specification +===================== -hdmf-zarr currently uses the Zarr :zarr-docs:`DirectoryStory `, -which uses directories and files on a standard file system to serialize data. Below we describe how +hdmf-zarr currently uses the Zarr :zarr-docs:`DirectoryStore `, +which uses directories and files on a standard file system to serialize data. Format Mapping ============== @@ -62,6 +62,14 @@ Groups object ID Attribute ``object_id`` on the Zarr Group ============================ ====================================================================================== +.. _sec-zarr-storage-groups-reserved: + +Reserved groups +---------------- + +The :py:class:`~hdmf_zarr.backend.ZarrIO` backend typically caches the schema used to create a file in the +group ``/specifications`` (see also :ref:`sec-zarr-caching-specifications`) + .. _sec-zarr-storage-datasets: Datasets @@ -127,8 +135,9 @@ Reserved attributes ------------------- The :py:class:`~hdmf_zarr.backend.ZarrIO` backend defines a set of reserved attribute names defined in -py:attr:`~hdmf_zarr.backend.ZarrIO.__reserve_attribute`. These reserved attributes are used to implement -functionality (e.g., links and object references) that are not natively supported by Zarr. +:py:attr:`~hdmf_zarr.backend.ZarrIO.__reserve_attribute`. These reserved attributes are used to implement +functionality (e.g., links and object references, which are not natively supported by Zarr) and may be +added on any Group or Dataset in the file. ============================ ====================================================================================== Reserved Attribute Name Usage @@ -139,6 +148,16 @@ functionality (e.g., links and object references) that are not natively supporte See :ref:`sec-zarr-storage-references` ============================ ====================================================================================== +In addition, the following reserved attributes are added to the root Group of the file only: + + ============================ ====================================================================================== + Reserved Attribute Name Usage + ============================ ====================================================================================== + .specloc Attribute storing the path to the Group where the scheme for the file are + cached. See :py:attr:`~hdmf_zarr.backend.SPEC_LOC_ATTR` + ============================ ====================================================================================== + + .. _sec-zarr-storage-links: Links @@ -337,6 +356,8 @@ The mappings of data types is as follows +--------------------------+------------------------------------+----------------+ +.. _sec-zarr-caching-specifications: + Caching format specifications ============================= @@ -345,8 +366,11 @@ directly in the Zarr file. Caching the specification in the file ensures that us the specification directly if necessary without requiring external resources. For the Zarr backend, caching of the schema is implemented as follows. -The Zarr backend adds the reserved top-level group ``/specifications`` in which all format specifications (including -extensions) are cached. The ``/specifications`` group contains for each specification namespace a subgroup +The :py:class:`~hdmf_zarr.backend.ZarrIO`` backend adds the reserved top-level group ``/specifications`` +in which all format specifications (including extensions) are cached. The default name for this group is +defined in :py:attr:`~hdmf_zarr.backend.DEFAULT_SPEC_LOC_DIR` and caching of +specifications is implemented in ``ZarrIO.__cache_spec``. +The ``/specifications`` group contains for each specification namespace a subgroup ``/specifications//`` in which the specification for a particular version of a namespace are stored (e.g., ``/specifications/core/2.0.1`` in the case of the NWB core namespace at version 2.0.1). The actual specification data is then stored as a JSON string in scalar datasets with a binary, variable-length string diff --git a/requirements-min.txt b/requirements-min.txt index 79ba5231..bf4d276f 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,4 +1,4 @@ -hdmf==3.4.0 +hdmf==3.5.0 zarr==2.11.0 numcodecs==0.9.1 pynwb==2.0.0 diff --git a/requirements.txt b/requirements.txt index 7dc5c58c..69d3947b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # pinned dependencies to reproduce an entire development environment to use HDMF-ZARR -hdmf==3.4.0 +hdmf==3.5.0 zarr==2.11.0 numcodecs==0.9.1 pynwb==2.0.1 \ No newline at end of file diff --git a/setup.py b/setup.py index dd9f4800..50953471 100755 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ reqs = [ - 'hdmf>=3.4.0', + 'hdmf>=3.5.0', 'zarr>=2.11.0', 'numcodecs>=0.9.1', 'pynwb>=2.0.0', diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 43444c80..659ea68e 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -12,6 +12,9 @@ import zarr from zarr.hierarchy import Group from zarr.core import Array +from zarr.storage import (DirectoryStore, + TempStore, + NestedDirectoryStore) import numcodecs # HDMF-ZARR imports @@ -48,12 +51,33 @@ # Module variables ROOT_NAME = 'root' +""" +Name of the root builder for read/write +""" + SPEC_LOC_ATTR = '.specloc' +""" +Reserved attribute storing the path to the Group where the schema for the file are cached +""" + +DEFAULT_SPEC_LOC_DIR = 'specifications' +""" +Default name of the group where specifications should be cached +""" + +SUPPORTED_ZARR_STORES = (DirectoryStore, + TempStore, + NestedDirectoryStore) +""" +Tuple listing all Zarr storage backends supported by ZarrIO +""" class ZarrIO(HDMFIO): - @docval({'name': 'path', 'type': str, 'doc': 'the path to the Zarr file'}, + @docval({'name': 'path', + 'type': (str, *SUPPORTED_ZARR_STORES), + 'doc': 'the path to the Zarr file or a supported Zarr store'}, {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {'name': 'mode', 'type': str, 'doc': 'the mode to open the Zarr file with, one of ("w", "r", "r+", "a", "w-")'}, @@ -86,7 +110,22 @@ def __init__(self, **kwargs): self.__dci_queue = ZarrIODataChunkIteratorQueue() # a queue of DataChunkIterators that need to be exhausted # Codec class to be used. Alternates, e.g., =numcodecs.JSON self.__codec_cls = numcodecs.pickles.Pickle if object_codec_class is None else object_codec_class - super().__init__(manager, source=path) + source_path = self.__path + if isinstance(self.__path, SUPPORTED_ZARR_STORES): + source_path = self.__path.path + super().__init__(manager, source=source_path) + warn_msg = ("The ZarrIO backend is experimental. It is under active development. " + "The ZarrIO backend may change any time and backward compatibility is not guaranteed.") + warnings.warn(warn_msg) + + @property + def file(self): + """ + The Zarr zarr.hierarchy.Group (or zarr.core.Array) opened by the backend. + May be None in case open has not been called yet, e.g., if no data has been + read or written yet via this instance. + """ + return self.__file @property def path(self): @@ -96,7 +135,7 @@ def path(self): @property def abspath(self): """The absolute path to the Zarr file""" - return os.path.abspath(self.path) + return os.path.abspath(self.source) @property def synchronizer(self): @@ -109,7 +148,7 @@ def object_codec_class(self): def open(self): """Open the Zarr file""" if self.__file is None: - self.__file = zarr.open(store=self.__path, + self.__file = zarr.open(store=self.path, mode=self.__mode, synchronizer=self.__synchronizer) @@ -122,7 +161,9 @@ def close(self): @docval({'name': 'namespace_catalog', 'type': (NamespaceCatalog, TypeMap), 'doc': 'the NamespaceCatalog or TypeMap to load namespaces into'}, - {'name': 'path', 'type': str, 'doc': 'the path to the Zarr file'}, + {'name': 'path', + 'type': (str, *SUPPORTED_ZARR_STORES), + 'doc': 'the path to the Zarr file or a supported Zarr store'}, {'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None}) def load_namespaces(cls, namespace_catalog, path, namespaces=None): ''' @@ -165,7 +206,7 @@ def __cache_spec(self): if ref is not None: spec_group = self.__file[ref] else: - path = 'specifications' # do something to figure out where the specifications should go + path = DEFAULT_SPEC_LOC_DIR # do something to figure out where the specifications should go spec_group = self.__file.require_group(path) self.__file.attrs[SPEC_LOC_ATTR] = path ns_catalog = self.manager.namespace_catalog @@ -219,15 +260,16 @@ def get_written(self, builder, check_on_disk=False): """ written = self._written_builders.get_written(builder) if written and check_on_disk: - written = written and self.get_builder_exists_on_disk(builder=builder, filepath=self.__path) + written = written and self.get_builder_exists_on_disk(builder=builder) return written - @docval({'name': 'builder', 'type': Builder, 'doc': 'The builder of interest'}, - {'name': 'filepath', 'type': str, - 'doc': 'The path to the Zarr file or None for this file', 'default': None}) + @docval({'name': 'builder', 'type': Builder, 'doc': 'The builder of interest'}) def get_builder_exists_on_disk(self, **kwargs): - """Convenience function to check whether a given builder exists on disk""" - builder_path = self.get_builder_disk_path(**kwargs) + """ + Convenience function to check whether a given builder exists on disk in this Zarr file. + """ + builder = getargs('builder', kwargs) + builder_path = self.get_builder_disk_path(builder=builder, filepath=None) exists_on_disk = os.path.exists(builder_path) return exists_on_disk @@ -236,7 +278,7 @@ def get_builder_exists_on_disk(self, **kwargs): 'doc': 'The path to the Zarr file or None for this file', 'default': None}) def get_builder_disk_path(self, **kwargs): builder, filepath = getargs('builder', 'filepath', kwargs) - basepath = filepath if filepath is not None else self.__path + basepath = filepath if filepath is not None else self.source builder_path = os.path.join(basepath, self.__get_path(builder).lstrip("/")) return builder_path @@ -458,19 +500,35 @@ def __resolve_ref(self, zarr_ref): The function only constructs the links to the targe object, but it does not check if the object exists :param zarr_ref: Dict with `source` and `path` keys or a `ZarrRefernce` object - :return: Full path to the linked object + :return: 1) name of the target object + 2) the target zarr object within the target file """ # Extract the path as defined in the zarr_ref object if zarr_ref.get('source', None) is None: - ref_path = str(zarr_ref['path']) - elif zarr_ref.get('path', None) is None: - ref_path = str(zarr_ref['source']) + source_file = str(zarr_ref['path']) + else: + source_file = str(zarr_ref['source']) + # Resolve the path relative to the current file + source_file = os.path.abspath(os.path.join(self.source, source_file)) + object_path = zarr_ref.get('path', None) + # full_path = None + # if os.path.isdir(source_file): + # if object_path is not None: + # full_path = os.path.join(source_file, object_path.lstrip('/')) + # else: + # full_path = source_file + if object_path: + target_name = os.path.basename(object_path) else: - ref_path = os.path.join(zarr_ref['source'], zarr_ref['path'].lstrip("/")) - # Make the path relative to the current file - ref_path = os.path.abspath(os.path.join(self.path, ref_path)) + target_name = ROOT_NAME + target_zarr_obj = zarr.open(source_file, mode='r') + if object_path is not None: + try: + target_zarr_obj = target_zarr_obj[object_path] + except Exception: + raise ValueError("Found bad link to object %s in file %s" % (object_path, source_file)) # Return the create path - return ref_path + return target_name, target_zarr_obj def __get_ref(self, ref_object): """ @@ -502,7 +560,7 @@ def __get_ref(self, ref_object): # between backends a user should always use export which takes care of creating a clean set of builders. source = (builder.source if (builder.source is not None and os.path.isdir(builder.source)) - else self.__path) + else self.source) # Make the source relative to the current file source = os.path.relpath(os.path.abspath(source), start=self.abspath) # Return the ZarrReference object @@ -995,7 +1053,7 @@ def __read_group(self, zarr_obj, name=None): # Create the GroupBuilder attributes = self.__read_attrs(zarr_obj) - ret = GroupBuilder(name=name, source=self.__path, attributes=attributes) + ret = GroupBuilder(name=name, source=self.source, attributes=attributes) ret.location = self.get_zarr_parent_path(zarr_obj) # read sub groups @@ -1028,18 +1086,13 @@ def __read_links(self, zarr_obj, parent): links = zarr_obj.attrs['zarr_link'] for link in links: link_name = link['name'] - l_path = self.__resolve_ref(link) - if not os.path.exists(l_path): - raise ValueError("Found bad link %s in %s in file %s to %s" % - (link_name, self.__get_path(parent), self.__path, l_path)) - target_name = str(os.path.basename(l_path)) - target_zarr_obj = zarr.open(l_path, mode='r') + target_name, target_zarr_obj = self.__resolve_ref(link) # NOTE: __read_group and __read_dataset return the cached builders if the target has already been built if isinstance(target_zarr_obj, Group): builder = self.__read_group(target_zarr_obj, target_name) else: builder = self.__read_dataset(target_zarr_obj, target_name) - link_builder = LinkBuilder(builder=builder, name=link_name, source=self.__path) + link_builder = LinkBuilder(builder=builder, name=link_name, source=self.source) link_builder.location = os.path.join(parent.location, parent.name) self._written_builders.set_written(link_builder) # record that the builder has been written parent.set_link(link_builder) @@ -1056,7 +1109,7 @@ def __read_dataset(self, zarr_obj, name): "dtype": zarr_obj.attrs['zarr_dtype'], "maxshape": zarr_obj.shape, "chunks": not (zarr_obj.shape == zarr_obj.chunks), - "source": self.__path} + "source": self.source} dtype = kwargs['dtype'] # By default, use the zarr.core.Array as data for lazy data load @@ -1133,13 +1186,7 @@ def __parse_ref(self, shape, obj_refs, reg_refs, data): o = data for i in p: o = o[i] - path = self.__resolve_ref(o) - if not os.path.exists(path): - raise ValueError("Found bad link in dataset to %s" % (path)) - - target_name = os.path.basename(path) - target_zarr_obj = zarr.open(path, mode='r') - + target_name, target_zarr_obj = self.__resolve_ref(o) o = data for i in range(0, len(p)-1): o = data[p[i]] @@ -1156,12 +1203,7 @@ def __read_attrs(self, zarr_obj): if isinstance(v, dict) and 'zarr_dtype' in v: # TODO Is this the correct way to resolve references? if v['zarr_dtype'] == 'object': - path = self.__resolve_ref(v['value']) - if not os.path.exists(path): - raise ValueError("Found bad link in attribute to %s" % (path)) - - target_name = str(os.path.basename(path)) - target_zarr_obj = zarr.open(str(path), mode='r') + target_name, target_zarr_obj = self.__resolve_ref(v['value']) if isinstance(target_zarr_obj, zarr.hierarchy.Group): ret[k] = self.__read_group(target_zarr_obj, target_name) else: diff --git a/src/hdmf_zarr/nwb.py b/src/hdmf_zarr/nwb.py index 224c44d8..8a41719b 100644 --- a/src/hdmf_zarr/nwb.py +++ b/src/hdmf_zarr/nwb.py @@ -1,10 +1,10 @@ """Module with Zarr backend for NWB for integration with PyNWB""" from warnings import warn from .backend import ZarrIO -import zarr from hdmf.utils import (docval, - popargs) + popargs, + get_docval) from hdmf.backends.io import HDMFIO from hdmf.build import (BuildManager, TypeMap) @@ -15,24 +15,16 @@ class NWBZarrIO(ZarrIO): """ IO backend for PyNWB for writing NWB files - This class is similar to the NWBHDF5IO class in PyNWB. The main purpose of this class + This class is similar to the :py:class:`~pynwb.NWBHDF5IO` class in PyNWB. The main purpose of this class is to perform default setup for BuildManager, loading or namespaces etc., in the context of the NWB format. """ - - @docval({'name': 'path', 'type': str, 'doc': 'the path to the Zarr file'}, - {'name': 'mode', 'type': str, - 'doc': 'the mode to open the Zarr file with, one of ("w", "r", "r+", "a", "w-")'}, + @docval(*get_docval(ZarrIO.__init__), {'name': 'load_namespaces', 'type': bool, 'doc': 'whether or not to load cached namespaces from given path - not applicable in write mode', 'default': False}, - {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', - 'default': None}, {'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', - 'default': None}, - {'name': 'synchronizer', 'type': (zarr.ProcessSynchronizer, zarr.ThreadSynchronizer, bool), - 'doc': 'Zarr synchronizer to use for parallel I/O. If set to True a ProcessSynchronizer is used.', 'default': None}) def __init__(self, **kwargs): path, mode, manager, extensions, load_namespaces, synchronizer = \ diff --git a/tests/unit/test_io_zarr.py b/tests/unit/base_tests_zarrio.py similarity index 69% rename from tests/unit/test_io_zarr.py rename to tests/unit/base_tests_zarrio.py index ac09b330..3e157413 100644 --- a/tests/unit/test_io_zarr.py +++ b/tests/unit/base_tests_zarrio.py @@ -1,8 +1,13 @@ -"""Test module to validate the ZarrIO is working""" +""" +Module defining the base unit test cases for ZarrIO. + +The actual tests are then instantiated with various different backends in the +test_zarrio.py module.""" import unittest import os import numpy as np import shutil +import warnings # Try to import Zarr and disable tests if Zarr is not available import zarr @@ -34,28 +39,77 @@ CacheSpecTestHelper, get_temp_filepath) +from abc import ABCMeta, abstractmethod + -def total_directory_size(source): - """Helper function used to compute the size of a directory""" +def total_size(source): + """Helper function used to compute the size of a directory or file""" dsize = os.path.getsize(source) - for item in os.listdir(source): - itempath = os.path.join(source, item) - if os.path.isfile(itempath): - dsize += os.path.getsize(itempath) - elif os.path.isdir(itempath): - dsize += total_directory_size(itempath) + if os.path.isdir(source): + for item in os.listdir(source): + itempath = os.path.join(source, item) + if os.path.isfile(itempath): + dsize += os.path.getsize(itempath) + elif os.path.isdir(itempath): + dsize += total_size(itempath) return dsize -class TestZarrWriter(TestCase): - """Test writing of builder with Zarr""" +class BaseZarrWriterTestCase(TestCase, metaclass=ABCMeta): + """ + Base class for unit tests for ZarrIO with support to configure the data store used. + + Child classes must implement the ``setUp`` function of the test and define the following + main instance variables to define the data store to use for the tests defined here: + + :ivar store: The Zarr data store(s) to use. + :type store: Same as the `path`` parameter of :py:class:`~hdmf_zarr.backend.ZarrIO.__init__ ` + :ivar store_path: The path(s) to the Zarr file defined by the store + """ + + @abstractmethod def setUp(self): - self.manager = get_foo_buildmanager() - self.path = "test_io.zarr" + raise NotImplementedError def tearDown(self): - if os.path.exists(self.path): - shutil.rmtree(self.path) + """ + Remove all files and folders defined by self.store_path + """ + paths = self.store_path if isinstance(self.store_path, list) else [self.store_path, ] + for path in paths: + if os.path.exists(path): + if os.path.isdir(path): + shutil.rmtree(path) + elif os.path.isfile(path): + os.remove(path) + else: + warnings.warn("Could not remove: %s" % path) + + +class BaseTestZarrWriter(BaseZarrWriterTestCase): + """ + Test writing of builder with ZarrIO + + The following main instance variables need to be set by child classes to + customize the data store to use for the tests defined here: + + :ivar store: The Zarr data store to use. + :type store: Same as the `path`` parameter of :py:class:`~hdmf_zarr.backend.ZarrIO.__init__ ` + :ivar store_path: The path(s) to the Zarr file defined by the store + + The builder data for the tests is defined by: + + :ivar manager: The build manager to use for writing the builders + + and the functions ``createGroupBuilder``, ``createReferenceBuilder`` and + ``createReferenceCompoundBuilder``. Customizing the builder data is in + principle possible in child classes but has not been tested. + """ + + def setUp(self): + self.manager = get_foo_buildmanager() + self.store = "test_io.zarr" + self.store_path = self.store def createGroupBuilder(self): self.foo_builder = GroupBuilder('foo1', @@ -67,7 +121,7 @@ def createGroupBuilder(self): # self.manager.prebuilt(self.foo, self.foo_builder) self.builder = GroupBuilder( 'root', - source=self.path, + source=self.store_path, groups={'test_bucket': GroupBuilder('test_bucket', groups={'foo_holder': @@ -75,7 +129,7 @@ def createGroupBuilder(self): groups={'foo1': self.foo_builder})})}, attributes={'data_type': 'FooFile'}) - def getReferenceBuilder(self): + def createReferenceBuilder(self): data_1 = np.arange(100, 200, 10).reshape(2, 5) data_2 = np.arange(0, 200, 10).reshape(4, 5) dataset_1 = DatasetBuilder('dataset_1', data_1) @@ -87,13 +141,13 @@ def getReferenceBuilder(self): dataset_ref = DatasetBuilder('ref_dataset', ref_data, dtype='object') builder = GroupBuilder('root', - source=self.path, + source=self.store_path, datasets={'dataset_1': dataset_1, 'dataset_2': dataset_2, 'ref_dataset': dataset_ref}) return builder - def getReferenceCompoundBuilder(self): + def createReferenceCompoundBuilder(self): data_1 = np.arange(100, 200, 10).reshape(2, 5) data_2 = np.arange(0, 200, 10).reshape(4, 5) dataset_1 = DatasetBuilder('dataset_1', data_1) @@ -110,25 +164,25 @@ def getReferenceCompoundBuilder(self): {'name': 'reference', 'dtype': 'object'}] dataset_ref = DatasetBuilder('ref_dataset', ref_data, dtype=ref_data_type) builder = GroupBuilder('root', - source=self.path, + source=self.store_path, datasets={'dataset_1': dataset_1, 'dataset_2': dataset_2, 'ref_dataset': dataset_ref}) return builder def read_test_dataset(self): - reader = ZarrIO(self.path, manager=self.manager, mode='r') + reader = ZarrIO(self.store, manager=self.manager, mode='r') self.root = reader.read_builder() dataset = self.root['test_bucket/foo_holder/foo1/my_data'] return dataset def read(self): - reader = ZarrIO(self.path, manager=self.manager, mode='r') + reader = ZarrIO(self.store, manager=self.manager, mode='r') self.root = reader.read_builder() def test_cache_spec(self): - self.io = ZarrIO(self.path, manager=self.manager, mode='w') + tempIO = ZarrIO(self.store, manager=self.manager, mode='w') # Setup all the data we need foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) @@ -137,14 +191,14 @@ def test_cache_spec(self): foofile = FooFile(buckets=[foobucket]) # Write the first file - self.io.write(foofile, cache_spec=True) - self.io.close() + tempIO.write(foofile, cache_spec=True) + tempIO.close() # Load the spec and assert that it is valid ns_catalog = NamespaceCatalog() - ZarrIO.load_namespaces(ns_catalog, self.path) + ZarrIO.load_namespaces(ns_catalog, self.store) self.assertEqual(ns_catalog.namespaces, ('test_core',)) - source_types = CacheSpecTestHelper.get_types(self.io.manager.namespace_catalog) + source_types = CacheSpecTestHelper.get_types(self.manager.namespace_catalog) read_types = CacheSpecTestHelper.get_types(ns_catalog) self.assertSetEqual(source_types, read_types) @@ -152,7 +206,7 @@ def test_write_int(self, test_data=None): data = np.arange(100, 200, 10).reshape(2, 5) if test_data is None else test_data self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) self.createGroupBuilder() - writer = ZarrIO(self.path, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() @@ -170,7 +224,7 @@ def test_write_compound(self, test_data=None): {'name': 'name', 'dtype': str}] self.__dataset_builder = DatasetBuilder('my_data', data, dtype=data_type) self.createGroupBuilder() - writer = ZarrIO(self.path, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() @@ -179,7 +233,7 @@ def test_write_chunk(self, test_data=None): data_io = ZarrDataIO(data=data, chunks=(1, 5), fillvalue=-1) self.__dataset_builder = DatasetBuilder('my_data', data_io, attributes={'attr2': 17}) self.createGroupBuilder() - writer = ZarrIO(self.path, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() @@ -188,7 +242,7 @@ def test_write_strings(self, test_data=None): ['b', 'bb', 'bbb', 'bbbb', 'bbbbb']] if test_data is None else test_data self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) self.createGroupBuilder() - writer = ZarrIO(self.path, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() @@ -199,7 +253,7 @@ def test_write_links(self, test_data=None): link_parent = self.builder['test_bucket'] link_parent.set_link(LinkBuilder(self.foo_builder, 'my_link')) link_parent.set_link(LinkBuilder(self.__dataset_builder, 'my_dataset')) - writer = ZarrIO(self.path, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() @@ -207,32 +261,34 @@ def test_write_link_array(self): data = np.arange(100, 200, 10).reshape(2, 5) self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) self.createGroupBuilder() - writer = ZarrIO(self.path, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(self.builder) - zarr_array = zarr.open(self.path+"/test_bucket/foo_holder/foo1/my_data", mode='r') + zarr_file = zarr.open(self.store, mode='r') + zarr_array = zarr_file["/test_bucket/foo_holder/foo1/my_data"] link_io = ZarrDataIO(data=zarr_array, link_data=True) link_dataset = DatasetBuilder('dataset_link', link_io) self.builder['test_bucket'].set_dataset(link_dataset) writer.write_builder(self.builder) writer.close() - reader = ZarrIO(self.path, manager=self.manager, mode='r') + reader = ZarrIO(self.store, manager=self.manager, mode='r') self.root = reader.read_builder() read_link = self.root['test_bucket/dataset_link'] read_link_data = read_link['builder']['data'][:] self.assertTrue(np.all(data == read_link_data)) + reader.close() def test_write_reference(self): - builder = self.getReferenceBuilder() - writer = ZarrIO(self.path, + builder = self.createReferenceBuilder() + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(builder) writer.close() def test_write_reference_compound(self): - builder = self.getReferenceCompoundBuilder() - writer = ZarrIO(self.path, manager=self.manager, mode='a') + builder = self.createReferenceCompoundBuilder() + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(builder) writer.close() @@ -281,7 +337,7 @@ def test_read_link_buf(self): link_parent_2 = self.builder['test_bucket/foo_holder'] link_parent_1.set_link(LinkBuilder(self.__dataset_builder, 'my_dataset_1')) link_parent_2.set_link(LinkBuilder(self.__dataset_builder, 'my_dataset_2')) - writer = ZarrIO(self.path, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() self.read() @@ -291,7 +347,7 @@ def test_read_link_buf(self): def test_read_reference(self): self.test_write_reference() self.read() - builder = self.getReferenceBuilder()['ref_dataset'] + builder = self.createReferenceBuilder()['ref_dataset'] read_builder = self.root['ref_dataset'] # Load the linked arrays and confirm we get the same data as we had in the original builder for i, v in enumerate(read_builder['data']): @@ -300,7 +356,7 @@ def test_read_reference(self): def test_read_reference_compound(self): self.test_write_reference_compound() self.read() - builder = self.getReferenceCompoundBuilder()['ref_dataset'] + builder = self.createReferenceCompoundBuilder()['ref_dataset'] read_builder = self.root['ref_dataset'] # Load the elements of each entry in the compound dataset and compar the index, string, and referenced array for i, v in enumerate(read_builder['data']): @@ -328,11 +384,11 @@ def test_read_reference_compound_buf(self): {'name': 'reference', 'dtype': 'object'}] dataset_ref = DatasetBuilder('ref_dataset', ref_data, dtype=ref_data_type) builder = GroupBuilder('root', - source=self.path, + source=self.store_path, datasets={'dataset_1': dataset_1, 'dataset_2': dataset_2, 'ref_dataset': dataset_ref}) - writer = ZarrIO(self.path, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(builder) writer.close() @@ -341,34 +397,35 @@ def test_read_reference_compound_buf(self): self.assertTrue(self.root["ref_dataset"].data[0][2] == self.root['ref_dataset'].data[2][2]) -class TestZarrWriteUnit(TestCase): +class BaseTestZarrWriteUnit(BaseZarrWriterTestCase): """ Unit test for individual write functions """ - def setUp(self): - self.path = "test_io.zarr" - self.io = ZarrIO(self.path, mode='w') - self.f = self.io._ZarrIO__file - def tearDown(self): - if os.path.exists(self.path): - shutil.rmtree(self.path) + def setUp(self): + self.store = "test_io.zarr" + self.store_path = self.store ############################################# # ZarrDataIO general ############################################# def test_set_object_codec(self): # Test that the default codec is the Pickle store - self.assertEqual(self.io.object_codec_class.__qualname__, 'Pickle') - temp_io = ZarrIO(self.path, mode='w', object_codec_class=JSON) - self.assertEqual(temp_io.object_codec_class.__qualname__, 'JSON') + tempIO = ZarrIO(self.store, mode='w') + self.assertEqual(tempIO.object_codec_class.__qualname__, 'Pickle') + del tempIO # also calls tempIO.close() + tempIO = ZarrIO(self.store, mode='w', object_codec_class=JSON) + self.assertEqual(tempIO.object_codec_class.__qualname__, 'JSON') + tempIO.close() def test_synchronizer_constructor_arg_bool(self): """Test that setting the synchronizer argument to True/False works in ZarrIO""" - self.assertIsNone(self.io.synchronizer) - self.io.close() - self.io = ZarrIO(self.path, mode='w', synchronizer=True) - self.assertTrue(isinstance(self.io.synchronizer, zarr.ProcessSynchronizer)) + tempIO = ZarrIO(self.store, mode='w', synchronizer=False) + self.assertIsNone(tempIO.synchronizer) + del tempIO # also calls tempIO.close() + tempIO = ZarrIO(self.store, mode='w', synchronizer=True) + self.assertTrue(isinstance(tempIO.synchronizer, zarr.ProcessSynchronizer)) + tempIO.close() def test_zarrdataio_enable_default_compressor(self): """Default compression simply means not specifying any compressor and using Zarr defaults""" @@ -403,21 +460,29 @@ def test_zarrdataio_array_conversion_datachunkiterator(self): def test_get_builder_exists_on_disk(self): """Test that get_builder_exists_on_disk finds the existing builder""" dset_builder = DatasetBuilder('test_dataset', 10, attributes={}) - self.assertFalse(self.io.get_builder_exists_on_disk(dset_builder)) # Make sure False is returned before write - self.io.write_dataset(self.f, dset_builder) - self.assertTrue(self.io.get_builder_exists_on_disk(dset_builder)) # Make sure True is returned after write + tempIO = ZarrIO(self.store, mode='w') + self.assertFalse(tempIO.get_builder_exists_on_disk(builder=dset_builder)) # Make sure is False is before write + tempIO .write_dataset(tempIO.file, dset_builder) + self.assertTrue(tempIO.get_builder_exists_on_disk(builder=dset_builder)) # Make sure is True after write + tempIO.close() def test_get_written(self): """Test that get_builder_exists_on_disk finds the existing builder""" + tempIO = ZarrIO(self.store, mode='w') dset_builder = DatasetBuilder('test_dataset', 10, attributes={}) - self.assertFalse(self.io.get_written(dset_builder)) # Make sure False is returned before write - self.io.write_dataset(self.f, dset_builder) - self.assertTrue(self.io.get_written(dset_builder)) # Make sure True is returned after write - self.assertTrue(self.io.get_written(dset_builder, check_on_disk=True)) # Make sure its also on disk - # Now delete it from disk and check again - shutil.rmtree(self.io.get_builder_disk_path(dset_builder)) - self.assertTrue(self.io.get_written(dset_builder)) # The written flag should still be true - self.assertFalse(self.io.get_written(dset_builder, check_on_disk=True)) # But with check on disk should fail + self.assertFalse(tempIO.get_written(dset_builder)) # Make sure False is returned before write + tempIO.write_dataset(tempIO.file, dset_builder) + self.assertTrue(tempIO.get_written(dset_builder)) # Make sure True is returned after write + self.assertTrue(tempIO.get_written(dset_builder, check_on_disk=True)) # Make sure its also on disk + # Now delete it from disk and check again. + builder_path = tempIO.get_builder_disk_path(dset_builder) + if os.path.isdir(builder_path): # Skip this check for file-based stores were we can easily delete objects + shutil.rmtree(builder_path) + # The written flag should still be true + self.assertTrue(tempIO.get_written(dset_builder)) + # But with check on disk should fail + self.assertFalse(tempIO.get_written(dset_builder, check_on_disk=True)) + tempIO.close() ########################################## # write_attributes @@ -425,15 +490,18 @@ def test_get_written(self): def __write_attribute_test_helper(self, name, value, assert_value=True): """ Helper function to write a single attribute and check its value for correctness + :param name: Name of the attribute :param value: Value of the attribute :param assert_value: Boolean indicating whether we should check correctness of the returned value :returns: the value read from disk so we can do our own tests if needed """ # write the attribute - testgroup = self.io._ZarrIO__file # For testing we just use our file and create some attributes + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + testgroup = tempIO.file # For testing we just use our file and create some attributes attr = {name: value} - self.io.write_attributes(testgroup, attr) + tempIO.write_attributes(testgroup, attr) # read the attribute read_val = testgroup.attrs[name] # assert that the read value matches the expected value @@ -444,6 +512,7 @@ def __write_attribute_test_helper(self, name, value, assert_value=True): self.assertListEqual(list(read_val), value.tolist()) else: self.assertEqual(testgroup.attrs[name], value) + tempIO.close() return read_val def test_write_attributes_write_scalar_int(self): @@ -502,72 +571,94 @@ def test_write_attributes_write_3Darray_of_floats(self): def test_write_attributes_write_reference_to_datasetbuilder(self): data_1 = np.arange(100, 200, 10).reshape(2, 5) dataset_1 = DatasetBuilder('dataset_1', data_1) - testgroup = self.io._ZarrIO__file # For testing we just use our file and create some attributes + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() attr = {'attr1': dataset_1} - self.io.write_attributes(testgroup, attr) + tempIO.write_attributes(obj=tempIO.file, attributes=attr) expected_value = {'attr1': {'zarr_dtype': 'object', 'value': {'source': ".", 'path': '/dataset_1'}}} - self.assertDictEqual(testgroup.attrs.asdict(), expected_value) + self.assertDictEqual(tempIO.file.attrs.asdict(), expected_value) + tempIO.close() def test_write_attributes_write_reference_to_referencebuilder(self): data_1 = np.arange(100, 200, 10).reshape(2, 5) dataset_1 = DatasetBuilder('dataset_1', data_1) ref1 = ReferenceBuilder(dataset_1) - testgroup = self.io._ZarrIO__file # For testing we just use our file and create some attributes + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() attr = {'attr1': ref1} - self.io.write_attributes(testgroup, attr) + tempIO.write_attributes(obj=tempIO.file, attributes=attr) expected_value = {'attr1': {'zarr_dtype': 'object', 'value': {'source': ".", 'path': '/dataset_1'}}} - self.assertDictEqual(testgroup.attrs.asdict(), expected_value) + self.assertDictEqual(tempIO.file.attrs.asdict(), expected_value) + tempIO.close() ########################################## # write_dataset tests: scalars ########################################## def test_write_dataset_scalar(self): a = 10 - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', a, attributes={})) + dset = tempIO.file['test_dataset'] self.assertTupleEqual(dset.shape, (1,)) self.assertEqual(dset[()], a) + tempIO.close() def test_write_dataset_string(self): a = 'test string' - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', a, attributes={})) + dset = tempIO.file['test_dataset'] self.assertTupleEqual(dset.shape, (1,)) self.assertEqual(dset[()], a) + tempIO.close() ########################################## # write_dataset tests: lists ########################################## def test_write_dataset_list(self): a = np.arange(30).reshape(5, 2, 3) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a.tolist(), attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', a.tolist(), attributes={})) + dset = tempIO.file['test_dataset'] self.assertTrue(np.all(dset[:] == a)) + tempIO.close() def test_write_dataset_list_chunked(self): a = ZarrDataIO(np.arange(30).reshape(5, 2, 3), chunks=(1, 1, 3)) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', a, attributes={})) + dset = tempIO.file['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.chunks, (1, 1, 3)) + tempIO.close() def test_write_dataset_list_fillvalue(self): a = ZarrDataIO(np.arange(20).reshape(5, 4), fillvalue=-1) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', a, attributes={})) + dset = tempIO.file['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertEqual(dset.fill_value, -1) + tempIO.close() @unittest.skipIf(DISABLE_ZARR_COMPRESSION_TESTS, 'Skip test due to numcodec compressor not available') def test_write_dataset_list_compress(self): compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE) a = ZarrDataIO(np.arange(30).reshape(5, 2, 3), compressor=compressor) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', a, attributes={})) + dset = tempIO.file['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertTrue(dset.compressor == compressor) + tempIO.close() @unittest.skipIf(DISABLE_ZARR_COMPRESSION_TESTS, 'Skip test due to numcodec compressor not available') def test_write_dataset_list_compress_and_filter(self): @@ -576,19 +667,25 @@ def test_write_dataset_list_compress_and_filter(self): a = ZarrDataIO(np.arange(30, dtype='i4').reshape(5, 2, 3), compressor=compressor, filters=filters) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', a, attributes={})) + dset = tempIO.file['test_dataset'] self.assertTrue(np.all(dset[:] == a.data)) self.assertTrue(dset.compressor == compressor) self.assertListEqual(dset.filters, filters) + tempIO.close() ########################################## # write_dataset tests: Iterable ########################################## def test_write_dataset_iterable(self): - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', range(10), attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', range(10), attributes={})) + dset = tempIO.file['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) + tempIO.close() ############################################## # write_dataset tests: compound data tables @@ -600,10 +697,13 @@ def test_write_structured_array_table(self): data['b'][1] = 0.1 dt = [{'name': 'a', 'dtype': 'int32', 'doc': 'a column'}, {'name': 'b', 'dtype': 'float64', 'doc': 'b column'}] - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', data, attributes={}, dtype=dt)) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', data, attributes={}, dtype=dt)) + dset = tempIO.file['test_dataset'] self.assertEqual(dset['a'].tolist(), data['a'].tolist()) self.assertEqual(dset['b'].tolist(), data['b'].tolist()) + tempIO.close() def test_write_nested_structured_array_table(self): b_cmpd_dt = np.dtype([('c', np.int32), ('d', np.float64)]) @@ -616,11 +716,14 @@ def test_write_nested_structured_array_table(self): {'name': 'd', 'dtype': 'float64', 'doc': 'd column'}] dt = [{'name': 'a', 'dtype': 'int32', 'doc': 'a column'}, {'name': 'b', 'dtype': b_dt, 'doc': 'b column'}] - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', data, attributes={}, dtype=dt)) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', data, attributes={}, dtype=dt)) + dset = tempIO.file['test_dataset'] # Test that all elements match. dset return np.void types so we just compare strings for simplicity for i in range(10): self.assertEqual(str(dset[i]), str(data[i])) + tempIO.close() ############################################# # write_dataset tests: data chunk iterator @@ -629,10 +732,13 @@ def test_write_dataset_iterable_multidimensional_array(self): a = np.arange(30).reshape(5, 2, 3) aiter = iter(a) daiter = DataChunkIterator.from_iterable(aiter, buffer_size=2) - self.io.write_dataset(parent=self.f, - builder=DatasetBuilder('test_dataset', daiter, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(parent=tempIO.file, + builder=DatasetBuilder('test_dataset', daiter, attributes={})) + dset = tempIO.file['test_dataset'] self.assertListEqual(dset[:].tolist(), a.tolist()) + tempIO.close() def test_write_dataset_iterable_multidimensional_array_compression(self): a = np.arange(30).reshape(5, 2, 3) @@ -641,17 +747,23 @@ def test_write_dataset_iterable_multidimensional_array_compression(self): compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE) wrapped_daiter = ZarrDataIO(data=daiter, compressor=compressor) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_daiter, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', wrapped_daiter, attributes={})) + dset = tempIO.file['test_dataset'] self.assertEqual(dset.shape, a.shape) self.assertListEqual(dset[:].tolist(), a.tolist()) self.assertTrue(dset.compressor == compressor) + tempIO.close() def test_write_dataset_data_chunk_iterator(self): dci = DataChunkIterator(data=np.arange(10), buffer_size=2) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', dci, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', dci, attributes={})) + dset = tempIO.file['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) + tempIO.close() def test_write_dataset_data_chunk_iterator_with_compression(self): dci = DataChunkIterator(data=np.arange(10), buffer_size=2) @@ -659,11 +771,14 @@ def test_write_dataset_data_chunk_iterator_with_compression(self): wrapped_dci = ZarrDataIO(data=dci, compressor=compressor, chunks=(2,)) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) + dset = tempIO.file['test_dataset'] self.assertListEqual(dset[:].tolist(), list(range(10))) self.assertTrue(dset.compressor == compressor) self.assertEqual(dset.chunks, (2,)) + tempIO.close() def test_pass_through_of_recommended_chunks(self): @@ -674,84 +789,113 @@ def recommended_chunk_shape(self): compressor = Blosc(cname='zstd', clevel=3, shuffle=Blosc.BITSHUFFLE) wrapped_dci = ZarrDataIO(data=dci, compressor=compressor) - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) - dset = self.f['test_dataset'] + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', wrapped_dci, attributes={})) + dset = tempIO.file['test_dataset'] self.assertEqual(dset.chunks, (5, 1, 1)) self.assertTrue(dset.compressor == compressor) + tempIO.close() ############################################# # Copy/Link h5py.Dataset object ############################################# def test_link_zarr_dataset_input(self): dset = DatasetBuilder('test_dataset', np.arange(10), attributes={}) - self.io.write_dataset(self.f, builder=dset) - softlink = DatasetBuilder('test_softlink', self.f['test_dataset'], attributes={}) - self.io.write_dataset(self.f, builder=softlink) - tempf = zarr.open(store=self.path, mode='r') + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, builder=dset) + softlink = DatasetBuilder('test_softlink', tempIO.file['test_dataset'], attributes={}) + tempIO.write_dataset(tempIO.file, builder=softlink) + tempf = zarr.open(store=self.store, mode='r') expected_link = {'name': 'test_softlink', 'path': '/test_dataset', - 'source': os.path.abspath(self.path)} + 'source': os.path.abspath(self.store_path)} self.assertEqual(len(tempf.attrs['zarr_link']), 1) self.assertDictEqual(tempf.attrs['zarr_link'][0], expected_link) + tempIO.close() def test_copy_zarr_dataset_input(self): - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) - self.io.write_dataset(self.f, - DatasetBuilder('test_copy', self.f['test_dataset'], attributes={}), - link_data=False) + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', np.arange(10), attributes={})) + tempIO.write_dataset(tempIO.file, + DatasetBuilder('test_copy', tempIO.file['test_dataset'], attributes={}), + link_data=False) # NOTE: In HDF5 this would be a HardLink. Since Zarr does not support links, this will be a copy instead. - self.assertListEqual(self.f['test_dataset'][:].tolist(), - self.f['test_copy'][:].tolist()) + self.assertListEqual(tempIO.file['test_dataset'][:].tolist(), + tempIO.file['test_copy'][:].tolist()) + tempIO.close() def test_link_dataset_zarrdataio_input(self): - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) - self.io.write_dataset(self.f, DatasetBuilder('test_softlink', - ZarrDataIO(data=self.f['test_dataset'], - link_data=True), - attributes={})) - tempf = zarr.open(store=self.path, mode='r') + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', np.arange(10), attributes={})) + tempIO.write_dataset(tempIO.file, + DatasetBuilder( + 'test_softlink', + ZarrDataIO(data=tempIO.file['test_dataset'], link_data=True), + attributes={}) + ) + tempf = zarr.open(self.store, mode='r') expected_link = {'name': 'test_softlink', 'path': '/test_dataset', - 'source': os.path.abspath(self.path)} + 'source': os.path.abspath(self.store_path)} self.assertEqual(len(tempf.attrs['zarr_link']), 1) self.assertDictEqual(tempf.attrs['zarr_link'][0], expected_link) + tempIO.close() def test_copy_dataset_zarrdataio_input(self): - self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) - self.io.write_dataset(self.f, - DatasetBuilder('test_copy', - ZarrDataIO(data=self.f['test_dataset'], - link_data=False), # Force dataset copy - attributes={}), - link_data=True) # Make sure the default behavior is set to link the data + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + tempIO.write_dataset(tempIO.file, DatasetBuilder('test_dataset', np.arange(10), attributes={})) + tempIO.write_dataset(tempIO.file, + DatasetBuilder('test_copy', + ZarrDataIO(data=tempIO.file['test_dataset'], + link_data=False), # Force dataset copy + attributes={}), + link_data=True) # Make sure the default behavior is set to link the data # NOTE: In HDF5 this would be a HardLink. Since Zarr does not support links, this will be a copy instead. - self.assertListEqual(self.f['test_dataset'][:].tolist(), - self.f['test_copy'][:].tolist()) + self.assertListEqual(tempIO.file['test_dataset'][:].tolist(), + tempIO.file['test_copy'][:].tolist()) + tempIO.close() def test_list_fill_empty(self): - dset = self.io.__list_fill__(self.f, 'empty_dataset', [], options={'dtype': int, 'io_settings': {}}) + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() + dset = tempIO.__list_fill__(tempIO.file, 'empty_dataset', [], options={'dtype': int, 'io_settings': {}}) self.assertTupleEqual(dset.shape, (0,)) + tempIO.close() def test_list_fill_empty_no_dtype(self): + tempIO = ZarrIO(self.store, mode='w') + tempIO.open() with self.assertRaisesRegex(Exception, r"cannot add empty_dataset to / - could not determine type"): - self.io.__list_fill__(self.f, 'empty_dataset', []) + tempIO.__list_fill__(tempIO.file, 'empty_dataset', []) + tempIO.close() -class TestExportZarrToZarr(TestCase): - """Test exporting Zarr to Zarr.""" +class BaseTestExportZarrToZarr(BaseZarrWriterTestCase): + """ + Test exporting Zarr to Zarr. + + In contrast to the normal BaseZarrWriterTestCase, here the store and store_path + variable require to be a list of length 4. + + :ivar store: List of 4 Zarr data stores to use. + :type store: List of 4 store values. Stores are the same as the `path`` + parameter of :py:class:`~hdmf_zarr.backend.ZarrIO.__init__ ` + :ivar store_path: The paths to the Zarr file defined by the stores + """ def setUp(self): - self.paths = [ + self.store = [ get_temp_filepath(), get_temp_filepath(), get_temp_filepath(), get_temp_filepath() ] - - def tearDown(self): - for p in self.paths: - if os.path.exists(p): - shutil.rmtree(p) + self.store_path = self.store def test_basic(self): """Test that exporting a written container works between Zarr and Zarr.""" @@ -759,19 +903,19 @@ def test_basic(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[1], mode='w') as export_io: export_io.export(src_io=read_io) - self.assertTrue(os.path.exists(self.paths[1])) - self.assertEqual(foofile.container_source, self.paths[0]) + self.assertTrue(os.path.exists(self.store_path[1])) + self.assertEqual(foofile.container_source, self.store_path[0]) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[1], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() - self.assertEqual(read_foofile.container_source, self.paths[1]) + self.assertEqual(read_foofile.container_source, self.store_path[1]) self.assertContainerEqual(foofile, read_foofile, ignore_hdmf_attrs=True) def test_basic_container(self): @@ -780,20 +924,20 @@ def test_basic_container(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[1], mode='w') as export_io: export_io.export(src_io=read_io, container=read_foofile) - self.assertTrue(os.path.exists(self.paths[1])) - self.assertEqual(foofile.container_source, self.paths[0]) + self.assertTrue(os.path.exists(self.store_path[1])) + self.assertEqual(foofile.container_source, self.store_path[0]) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[1], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() - self.assertEqual(read_foofile.container_source, self.paths[1]) + self.assertEqual(read_foofile.container_source, self.store_path[1]) self.assertContainerEqual(foofile, read_foofile, ignore_hdmf_attrs=True) def test_container_part(self): @@ -802,12 +946,12 @@ def test_container_part(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[1], mode='w') as export_io: msg = ("The provided container must be the root of the hierarchy of the source used to read the " "container.") with self.assertRaisesWith(ValueError, msg): @@ -819,11 +963,11 @@ def test_container_unknown(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[1], mode='w') as export_io: dummy_file = FooFile(buckets=[]) msg = "The provided container must have been read by the provided src_io." with self.assertRaisesWith(ValueError, msg): @@ -835,18 +979,18 @@ def test_cache_spec_disabled(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile, cache_spec=False) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[1], mode='w') as export_io: export_io.export( src_io=read_io, container=read_foofile, cache_spec=False) - self.assertFalse(os.path.exists(os.path.join(self.paths[1], 'specifications'))) + self.assertFalse(os.path.exists(os.path.join(self.store_path[1], 'specifications'))) def test_cache_spec_enabled(self): """Test that exporting with cache_spec works.""" @@ -854,18 +998,20 @@ def test_cache_spec_enabled(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[1], mode='w') as export_io: export_io.export( src_io=read_io, container=read_foofile, cache_spec=True) - self.assertTrue(os.path.exists(os.path.join(self.paths[1], 'specifications'))) + + with zarr.open(self.store[1], mode='r') as zarr_io: + self.assertTrue('specifications' in zarr_io.keys()) def test_soft_link_group(self): """ @@ -875,17 +1021,17 @@ def test_soft_link_group(self): foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket], foo_link=foo1) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[1], mode='w') as export_io: export_io.export(src_io=read_io, write_args=dict(link_data=False)) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[1], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile2 = read_io.read() # make sure the linked group is within the same file - self.assertEqual(read_foofile2.foo_link.container_source, self.paths[1]) - zarr_linkspec1 = zarr.open(self.paths[0])['links'].attrs.asdict()['zarr_link'][0] - zarr_linkspec2 = zarr.open(self.paths[1])['links'].attrs.asdict()['zarr_link'][0] + self.assertEqual(read_foofile2.foo_link.container_source, self.store_path[1]) + zarr_linkspec1 = zarr.open(self.store_path[0])['links'].attrs.asdict()['zarr_link'][0] + zarr_linkspec2 = zarr.open(self.store_path[1])['links'].attrs.asdict()['zarr_link'][0] self.assertEqual(zarr_linkspec1.pop('source'), ".") self.assertEqual(zarr_linkspec2.pop('source'), ".") self.assertDictEqual(zarr_linkspec1, zarr_linkspec2) @@ -898,19 +1044,19 @@ def test_soft_link_dataset(self): foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket], foofile_data=foo1.my_data) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile, link_data=True) print ("WRITE DONE") - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store_paths[1], mode='w') as export_io: export_io.export(src_io=read_io, write_args=dict(link_data=False)) - print(zarr.open(self.paths[0]).tree()) - print(zarr.open(self.paths[1]).tree()) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + print(zarr.open(self.store_paths[0]).tree()) + print(zarr.open(self.store_paths[1]).tree()) + with ZarrIO(self.store_paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile2 = read_io.read() # make sure the linked dataset is within the same file - print(open(self.paths[1]+"/buckets/bucket1/foo_holder/foo1/.zattrs", 'r').read()) - self.assertEqual(read_foofile2.foofile_data.path, self.paths[1]) + print(open(self.source_paths[1]+"/buckets/bucket1/foo_holder/foo1/.zattrs", 'r').read()) + self.assertEqual(read_foofile2.foofile_data.path, self.source_paths[1]) """ def test_external_link_group(self): @@ -922,33 +1068,33 @@ def test_external_link_group(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) # Create File 1 with the full data - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as read_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='w') as read_io: read_io.write(foofile) # Create file 2 with an external link to File 1 manager = get_foo_buildmanager() - with ZarrIO(self.paths[0], manager=manager, mode='r') as read_io: + with ZarrIO(self.store_paths[0], manager=manager, mode='r') as read_io: read_foofile = read_io.read() # make external link to existing group foofile2 = FooFile(foo_link=read_foofile.buckets['bucket1'].foos['foo1']) print("-------------------Write File 2----------------------------") - with ZarrIO(self.paths[1], manager=manager, mode='w') as write_io: + with ZarrIO(self.store_paths[1], manager=manager, mode='w') as write_io: write_io.write(foofile2) - self.assertDictEqual(zarr.open(self.paths[1])['links'].attrs.asdict(), + self.assertDictEqual(zarr.open(self.store_paths[1])['links'].attrs.asdict(), {'zarr_link': [{'name': 'foo_link', 'path': '/buckets/bucket1/foo_holder/foo1', - 'source': self.paths[0]}]}) + 'source': self.source_paths[0]}]}) # Export File 2 to a new File 3 and make sure the external link from File 2 is being preserved - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: - with ZarrIO(self.paths[2], mode='w') as export_io: + with ZarrIO(self.store_paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store_paths[2], mode='w') as export_io: print("-------------------Write File 3----------------------------") export_io.export(src_io=read_io) #print() - print(zarr.open(self.paths[1])['links'].attrs.asdict()) - print(zarr.open(self.paths[2])['links'].attrs.asdict()) - with ZarrIO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: + print(zarr.open(self.store_paths[1])['links'].attrs.asdict()) + print(zarr.open(self.store_paths[2])['links'].attrs.asdict()) + with ZarrIO(self.store_paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile2 = read_io.read() # make sure the linked group is read from the first file - self.assertEqual(read_foofile2.foo_link.container_source, self.paths[0]) + self.assertEqual(read_foofile2.foo_link.container_source, self.source_paths[0]) """ def test_external_link_dataset(self): @@ -958,24 +1104,22 @@ def test_external_link_dataset(self): foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket], foofile_data=[1, 2, 3]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) manager = get_foo_buildmanager() - with ZarrIO(self.paths[0], manager=manager, mode='r') as read_io: + with ZarrIO(self.store_paths[0], manager=manager, mode='r') as read_io: read_foofile = read_io.read() # make external link to existing dataset foofile2 = FooFile(foofile_data=read_foofile.foofile_data) - with ZarrIO(self.paths[1], manager=manager, mode='w') as write_io: + with ZarrIO(self.store_paths[1], manager=manager, mode='w') as write_io: write_io.write(foofile2) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: - self.ios.append(read_io) # track IO objects for tearDown - with ZarrIO(self.paths[2], mode='w') as export_io: + with ZarrIO(self.store_paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store_paths[2], mode='w') as export_io: export_io.export(src_io=read_io) - with ZarrIO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: - self.ios.append(read_io) # track IO objects for tearDown + with ZarrIO(self.store_paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile2 = read_io.read() # make sure the linked dataset is read from the first file - self.assertEqual(read_foofile2.foofile_data.file.filename, self.paths[0]) + self.assertEqual(read_foofile2.foofile_data.file.filename, self.source_paths[0]) """ def test_external_link_link(self): @@ -985,29 +1129,29 @@ def test_external_link_link(self): foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) manager = get_foo_buildmanager() - with ZarrIO(self.paths[0], manager=manager, mode='r') as read_io: + with ZarrIO(self.store_paths[0], manager=manager, mode='r') as read_io: read_foofile = read_io.read() # make external link to existing group foofile2 = FooFile(foo_link=read_foofile.buckets['bucket1'].foos['foo1']) - with ZarrIO(self.paths[1], manager=manager, mode='w') as write_io: + with ZarrIO(self.store_paths[1], manager=manager, mode='w') as write_io: write_io.write(foofile2) manager = get_foo_buildmanager() - with ZarrIO(self.paths[1], manager=manager, mode='r') as read_io: + with ZarrIO(self.store_paths[1], manager=manager, mode='r') as read_io: read_foofile2 = read_io.read() # make external link to external link foofile3 = FooFile(foo_link=read_foofile2.foo_link) - with ZarrIO(self.paths[2], manager=manager, mode='w') as write_io: + with ZarrIO(self.store_paths[2], manager=manager, mode='w') as write_io: write_io.write(foofile3) - with ZarrIO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: - with ZarrIO(self.paths[3], mode='w') as export_io: + with ZarrIO(self.store_paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store_paths[3], mode='w') as export_io: export_io.export(src_io=read_io) - with ZarrIO(self.paths[3], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store_paths[3], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile3 = read_io.read() # make sure the linked group is read from the first file - self.assertEqual(read_foofile3.foo_link.container_source, self.paths[0]) + self.assertEqual(read_foofile3.foo_link.container_source, self.source_paths[0]) """ def test_attr_reference(self): @@ -1018,20 +1162,20 @@ def test_attr_reference(self): foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket], foo_ref_attr=foo1) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as read_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='w') as read_io: read_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store_paths[1], mode='w') as export_io: export_io.export(src_io=read_io, write_args=dict(link_data=False)) - #with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + #with ZarrIO(self.store_paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: # read_foofile2 = read_io.read() #self.assertTupleEqual(ZarrIO.get_zarr_paths(read_foofile2.foo_ref_attr.my_data), - # (self.paths[1], '/buckets/bucket1/foo_holder/foo1/my_data')) + # (self.source_paths[1], '/buckets/bucket1/foo_holder/foo1/my_data')) # make sure the attribute reference resolves to the container within the same file #self.assertIs(read_foofile2.foo_ref_attr, read_foofile2.buckets['bucket1'].foos['foo1']) - expected_ref = {'value': {'path': '/buckets/bucket1/foo_holder/foo1', 'source': self.paths[1]}, + expected_ref = {'value': {'path': '/buckets/bucket1/foo_holder/foo1', 'source': self.source_paths[1]}, 'zarr_dtype': 'object'} - real_ref = zarr.open(self.paths[1]).attrs['foo_ref_attr'] + real_ref = zarr.open(self.store_paths[1]).attrs['foo_ref_attr'] self.assertDictEqual(real_ref, expected_ref) """ @@ -1041,25 +1185,25 @@ def test_pop_data(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() read_foofile.remove_bucket('bucket1') # remove child group - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[1], mode='w') as export_io: export_io.export(src_io=read_io, container=read_foofile) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[1], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile2 = read_io.read() # make sure the read foofile has no buckets self.assertDictEqual(read_foofile2.buckets, {}) # check that file size of file 2 is smaller - dirsize1 = total_directory_size(self.paths[0]) - dirsize2 = total_directory_size(self.paths[1]) + dirsize1 = total_size(self.store_path[0]) + dirsize2 = total_size(self.store_path[1]) self.assertTrue(dirsize1 > dirsize2) def test_pop_linked_group(self): @@ -1068,14 +1212,14 @@ def test_pop_linked_group(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket], foo_link=foo1) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() read_foofile.buckets['bucket1'].remove_foo('foo1') # remove child group - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[1], mode='w') as export_io: msg = ("links (links): Linked Foo 'foo1' has no parent. Remove the link or ensure the linked " "container is added properly.") with self.assertRaisesWith(OrphanContainerBuildError, msg): @@ -1091,9 +1235,9 @@ def test_append_data(self): foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile = read_io.read() # create a foo with link to existing dataset my_data, add the foo to new foobucket # this should make a soft link within the exported file @@ -1108,9 +1252,9 @@ def test_append_data(self): read_foofile.foofile_data = foo2.my_data # also add reference from foofile to new foo2 read_foofile.foo_ref_attr = foo2 - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store_paths[1], mode='w') as export_io: export_io.export(src_io=read_io, container=read_foofile) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with ZarrIO(self.store_paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: read_foofile2 = read_io.read() # test new soft link to dataset in file self.assertIs(read_foofile2.buckets['bucket1'].foos['foo1'].my_data, @@ -1121,8 +1265,8 @@ def test_append_data(self): self.assertIs(read_foofile2.buckets['bucket1'].foos['foo1'].my_data, read_foofile2.foofile_data) # test new attribute reference to new group in file self.assertIs(read_foofile2.foo_ref_attr, read_foofile2.buckets['bucket2'].foos['foo2']) - #with File(self.paths[1], 'r') as f: - # self.assertEqual(f['foofile_data'].file.filename, self.paths[1]) + #with File(self.store_paths[1], 'r') as f: + # self.assertEqual(f['foofile_data'].file.filename, self.store_paths[1]) # self.assertIsInstance(f.attrs['foo_ref_attr'], h5py.Reference) """ @@ -1133,15 +1277,15 @@ def test_append_external_link_data(self): foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) foofile2 = FooFile(buckets=[]) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store_paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile2) manager = get_foo_buildmanager() - with ZarrIO(self.paths[0], manager=manager, mode='r') as read_io1: + with ZarrIO(self.store_paths[0], manager=manager, mode='r') as read_io1: read_foofile1 = read_io1.read() - with ZarrIO(self.paths[1], manager=manager, mode='r') as read_io2: + with ZarrIO(self.store_paths[1], manager=manager, mode='r') as read_io2: read_foofile2 = read_io2.read() # create a foo with link to existing dataset my_data (not in same file), add the foo to new foobucket # this should make an external link within the exported file @@ -1151,19 +1295,18 @@ def test_append_external_link_data(self): # also add link from foofile to new foo2.my_data dataset which is a link to foo1.my_data dataset # this should make an external link within the exported file read_foofile2.foofile_data = foo2.my_data - with ZarrIO(self.paths[2], mode='w') as export_io: + with ZarrIO(self.store_paths[2], mode='w') as export_io: export_io.export(src_io=read_io2, container=read_foofile2) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: - self.ios.append(read_io1) # track IO objects for tearDown + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: read_foofile3 = read_io1.read() - with ZarrIO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: + with ZarrIO(self.store_paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: read_foofile4 = read_io2.read() self.assertEqual(read_foofile4.buckets['bucket2'].foos['foo2'].my_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) self.assertEqual(read_foofile4.foofile_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) - #with File(self.paths[2], 'r') as f: - # self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[0]) - # self.assertEqual(f['foofile_data'].file.filename, self.paths[0]) + #with File(self.source_paths[2], 'r') as f: + # self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.source_paths[0]) + # self.assertEqual(f['foofile_data'].file.filename, self.souce_paths[0]) # self.assertIsInstance(f.get('buckets/bucket2/foo_holder/foo2/my_data', getlink=True), # h5py.ExternalLink) # self.assertIsInstance(f.get('foofile_data', getlink=True), h5py.ExternalLink) @@ -1176,15 +1319,15 @@ def test_append_external_link_copy_data(self): foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) foofile2 = FooFile(buckets=[]) - with ZarrIO(self.paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store_paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile2) manager = get_foo_buildmanager() - with ZarrIO(self.paths[0], manager=manager, mode='r') as read_io1: + with ZarrIO(self.store_paths[0], manager=manager, mode='r') as read_io1: read_foofile1 = read_io1.read() - with ZarrIO(self.paths[1], manager=manager, mode='r') as read_io2: + with ZarrIO(self.store_paths[1], manager=manager, mode='r') as read_io2: read_foofile2 = read_io2.read() # create a foo with link to existing dataset my_data (not in same file), add the foo to new foobucket # this would normally make an external link but because link_data=False, data will be copied @@ -1194,20 +1337,20 @@ def test_append_external_link_copy_data(self): # also add link from foofile to new foo2.my_data dataset which is a link to foo1.my_data dataset # this would normally make an external link but because link_data=False, data will be copied read_foofile2.foofile_data = foo2.my_data - with ZarrIO(self.paths[2], mode='w') as export_io: + with ZarrIO(self.store_paths[2], mode='w') as export_io: export_io.export(src_io=read_io2, container=read_foofile2, write_args={'link_data': False}) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: + with ZarrIO(self.store_paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: read_foofile3 = read_io1.read() - with ZarrIO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: + with ZarrIO(self.store_paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: read_foofile4 = read_io2.read() # check that file can be read self.assertNotEqual(read_foofile4.buckets['bucket2'].foos['foo2'].my_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) self.assertNotEqual(read_foofile4.foofile_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) self.assertNotEqual(read_foofile4.foofile_data, read_foofile4.buckets['bucket2'].foos['foo2'].my_data) - # with File(self.paths[2], 'r') as f: - # self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[2]) - # self.assertEqual(f['foofile_data'].file.filename, self.paths[2]) + # with File(self.source_paths[2], 'r') as f: + # self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.source_paths[2]) + # self.assertEqual(f['foofile_data'].file.filename, self.source_paths[2]) """ def test_export_dset_refs(self): @@ -1220,17 +1363,17 @@ def test_export_dset_refs(self): bazs.append(Baz(name='baz%d' % i)) baz_data = BazData(name='baz_data1', data=bazs) bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_data=baz_data) - with ZarrIO(self.paths[0], manager=_get_baz_manager(), mode='w') as write_io: + with ZarrIO(self.store_paths[0], manager=_get_baz_manager(), mode='w') as write_io: write_io.write(bucket) - with ZarrIO(self.paths[0], manager=_get_baz_manager(), mode='r') as read_io: + with ZarrIO(self.store_paths[0], manager=_get_baz_manager(), mode='r') as read_io: read_bucket1 = read_io.read() # NOTE: reference IDs might be the same between two identical files # adding a Baz with a smaller name should change the reference IDs on export new_baz = Baz(name='baz000') read_bucket1.add_baz(new_baz) - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store_paths[1], mode='w') as export_io: export_io.export(src_io=read_io, container=read_bucket1) - with ZarrIO(self.paths[1], manager=_get_baz_manager(), mode='r') as read_io: + with ZarrIO(self.store_paths[1], manager=_get_baz_manager(), mode='r') as read_io: read_bucket2 = read_io.read() # remove and check the appended child, then compare the read container with the original read_new_baz = read_bucket2.remove_baz('baz000') @@ -1254,17 +1397,17 @@ def test_export_cpd_dset_refs(self): baz_pairs.append((i, b)) baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs) bucket = BazBucket(name='bucket1', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) - with ZarrIO(self.paths[0], manager=_get_baz_manager(), mode='w') as write_io: + with ZarrIO(self.store_paths[0], manager=_get_baz_manager(), mode='w') as write_io: write_io.write(bucket) - with ZarrIO(self.paths[0], manager=_get_baz_manager(), mode='r') as read_io: + with ZarrIO(self.store_paths[0], manager=_get_baz_manager(), mode='r') as read_io: read_bucket1 = read_io.read() # NOTE: reference IDs might be the same between two identical files # adding a Baz with a smaller name should change the reference IDs on export new_baz = Baz(name='baz000') read_bucket1.add_baz(new_baz) - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store_paths[1], mode='w') as export_io: export_io.export(src_io=read_io, container=read_bucket1) - with ZarrIO(self.paths[1], manager=_get_baz_manager(), mode='r') as read_io: + with ZarrIO(self.store_paths[1], manager=_get_baz_manager(), mode='r') as read_io: read_bucket2 = read_io.read() # remove and check the appended child, then compare the read container with the original read_new_baz = read_bucket2.remove_baz(new_baz.name) @@ -1282,7 +1425,7 @@ def test_non_manager_container(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) class OtherIO(HDMFIO): @@ -1300,7 +1443,7 @@ def close(self): pass with OtherIO() as read_io: - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[1], mode='w') as export_io: msg = 'When a container is provided, src_io must have a non-None manager (BuildManager) property.' with self.assertRaisesWith(ValueError, msg): export_io.export(src_io=read_io, container=foofile, write_args={'link_data': False}) @@ -1311,7 +1454,7 @@ def test_non_Zarr_src_link_data_true(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) class OtherIO(HDMFIO): @@ -1332,7 +1475,7 @@ def close(self): pass with OtherIO(manager=get_foo_buildmanager()) as read_io: - with ZarrIO(self.paths[1], mode='w') as export_io: + with ZarrIO(self.store[1], mode='w') as export_io: msg = "Cannot export from non-Zarr backend OtherIO to Zarr with write argument link_data=True." with self.assertRaisesWith(UnsupportedOperation, msg): export_io.export(src_io=read_io, container=foofile) @@ -1343,11 +1486,11 @@ def test_wrong_mode(self): foobucket = FooBucket('bucket1', [foo1]) foofile = FooFile(buckets=[foobucket]) - with ZarrIO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + with ZarrIO(self.store[0], manager=get_foo_buildmanager(), mode='w') as write_io: write_io.write(foofile) - with ZarrIO(self.paths[0], mode='r') as read_io: - with ZarrIO(self.paths[1], mode='a') as export_io: - msg = "Cannot export to file %s in mode 'a'. Please use mode 'w'." % self.paths[1] + with ZarrIO(self.store[0], mode='r') as read_io: + with ZarrIO(self.store[1], mode='a') as export_io: + msg = "Cannot export to file %s in mode 'a'. Please use mode 'w'." % self.store_path[1] with self.assertRaisesWith(UnsupportedOperation, msg): export_io.export(src_io=read_io) diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py index 76282107..9f7f0439 100644 --- a/tests/unit/test_io_convert.py +++ b/tests/unit/test_io_convert.py @@ -2,19 +2,35 @@ Module for testing conversion of data between different I/O backends To reduce the amount of code needed, the tests use a series of mixin classes to construct a test case: -- MixinTestCaseConvert is the base mixin class for conversion tests and - requires that the setUpContainer and roundtripExportContainer functions + +- ``MixinTestCaseConvert`` is the base mixin class for conversion tests and + requires that the ``setUpContainer`` and ``roundtripExportContainer`` functions are defined. The setUpContainer defines the container (and hence the problem case) - to be written to file. And the rountripExportContainer defined the process + to be written to file. And the roundtripExportContainer defined the process for writing, exporting, and then reading the container again. -- TestXYZContainerMixin classes define the setUpContainer function -- TestX1toX2Mixin defines the rountripExportContainer process -- TestCase is the base test class for HDMF +- ``TestXYZContainerMixin`` classes define the ``setUpContainer`` function +- ``TestX1toX2Mixin`` defines the ``roundtripExportContainer`` process +- ``TestCase`` is the base test class for HDMF + A test case is then constructed by defining a class that inherits from the -corresponding (usually 4) base classes, a mixin that define setUpContainer, -a mixin that define roundtripExportContainer, MixinTestCaseConvert, and TestCase. +corresponding (usually 4) base classes: + +1. a mixin that define ``setUpContainer``, +2. a mixin that define ``roundtripExportContainer``, +3. ``MixinTestCaseConvert``, and +4. TestCase. + I.e., even though a particular test class may look empty, it is the combination -of the mixin classes that creates the particular test problem. +of the mixin classes that creates the particular test problem. Many of the Mixin +classes also define additional class variables to allow child classes to further +customize the behavior of the mixin. + +.. note:: + + The mixin classes should not be instantiated or class variables be modified + directly as the individual mixin classes only define partial behavior and + modifying the behavior in the mixin will affect all downstream tests. + Mixin classes should always be used through inheritance. """ import os import shutil @@ -34,28 +50,72 @@ FooFile, get_foo_buildmanager) +from zarr.storage import (DirectoryStore, + TempStore, + NestedDirectoryStore) + class MixinTestCaseConvert(metaclass=ABCMeta): """ Mixin class used to define the basic structure for a conversion test. + + To implement a test case using this mixin we need to implement the abstract methods + ``setUpContainer`` and ``roundtripExportContainer``. The behavior of the mixin can + then be further customized via the class variables: IGNORE_NAME, IGNORE_HDMF_ATTRS, + IGNORE_STRING_TO_BYTE, WRITE_PATHS, EXPORT_PATHS. + """ IGNORE_NAME = False + """ + Bool parameter passed to assertContainerEqual (False) + """ + IGNORE_HDMF_ATTRS = False + """ + Bool parameter passed to assertContainerEqual (False) + """ + IGNORE_STRING_TO_BYTE = False + """ + Bool parameter passed to assertContainerEqual (False) + """ + + WRITE_PATHS = [None, ] + """ + List of paths to which to write files to as part of ``test_export_roundtrip``, + which passes the values to ``roundtripContainer``. The specific definition + of the individual paths depends on the backend used for writing in ``roundtripContainer``. + E.g., if :py:class:`~hdmf.backends.h5tools.HDF5IO` is used then the paths must be strings, + and when :py:class:`~hdmf_zarr.backend.ZarrIO` is used then paths may be strings or + supported ``zarr.storage`` backend objects, e.g., a ``zarr.storage.DirectoryStore``. + A value of None as part of list means to use the default filename for write. + (Default=[None, ]) + """ + + EXPORT_PATHS = [None, ] + """ + List of paths to which to export files to as part of ``test_export_roundtrip``, + which passes the values to ``roundtripContainer``. The specific definition + of the individual paths depends on the backend used for writing in ``roundtripContainer``. + E.g., if :py:class:`~hdmf.backends.h5tools.HDF5IO` is used then the paths must be strings, + and when :py:class:`~hdmf_zarr.backend.ZarrIO` is used then paths may be strings or + supported ``zarr.storage`` backend objects, e.g., a ``zarr.storage.DirectoryStore``. + A value of None as part of list means to use the default filename for export. + (Default=[None, ]) + """ def get_manager(self): raise NotImplementedError('Cannot run test unless get_manger is implemented') def setUp(self): self.__manager = self.get_manager() - self.container = self.setUpContainer() - self.container_type = self.container.__class__.__name__ - self.filename = 'test_%s.hdmf' % self.container_type - self.export_filename = 'test_export_%s.hdmf' % self.container_type - self.filenames = [self.filename, self.export_filename] + self.filenames = [] self.ios = [] def tearDown(self): + self.close_files_and_ios() + + def close_files_and_ios(self): for io in self.ios: if io is not None: io.close() @@ -65,6 +125,8 @@ def tearDown(self): shutil.rmtree(fn) else: os.remove(fn) + self.filenames = [] + self.ios = [] @abstractmethod def setUpContainer(self): @@ -72,36 +134,56 @@ def setUpContainer(self): raise NotImplementedError('Cannot run test unless setUpContainer is implemented') @abstractmethod - def roundtripExportContainer(self): + def roundtripExportContainer(self, container, write_path, export_path): """ - 1. Write the container to self.filename - 2. Export the file from 1 to self.export_filename using a new backend - 3. Read the exported container from disk - 4. Return the container read in 4 so that it can be compared with the original - Any HDMFIO backends that should remain open should be added to the self.io list - so that they can be closed on tearDown. + 1. Write the container to write_path + 2. Export the file from write_path to export_path using a new backend + 3. Read the exported container export_path from disk + 4. Return the container read in 3 so that it can be compared with the original + Any HDMFIO backends that should remain open MUST be added to the self.io list + so that they can be closed by close_files_and_ios (e.g., on tearDown), """ raise NotImplementedError('Cannot run test unless roundtripExportContainer is implemented') def test_export_roundtrip(self): - """Test that rountripping the container works""" - exported_container = self.roundtripExportContainer() - self.assertIsNotNone(str(self.container)) # added as a test to make sure printing works - self.assertIsNotNone(str(exported_container)) - # make sure we get a completely new object - self.assertNotEqual(id(self.container), id(exported_container)) - # the name of the root container of a file is always 'root' (see h5tools.py ROOT_NAME) - # thus, ignore the name of the container when comparing original container vs read container - self.assertContainerEqual(self.container, exported_container, - ignore_name=self.IGNORE_NAME, - ignore_hdmf_attrs=self.IGNORE_HDMF_ATTRS, - ignore_string_to_byte=self.IGNORE_STRING_TO_BYTE) - # TODO May need to add further asserts here - - -############################################ -# HDMF Common test harness -########################################### + """Test that roundtripping the container works""" + # determine and save the write and export paths + for write_path in self.WRITE_PATHS: + for export_path in self.EXPORT_PATHS: + container = self.setUpContainer() + container_type = container.__class__.__name__ + if write_path is None: + write_path = 'test_%s.hdmf' % container_type + if export_path is None: + export_path = 'test_export_%s.hdmf' % container_type + self.filenames.append(write_path if isinstance(write_path, str) else write_path.path) + self.filenames.append(export_path if isinstance(export_path, str) else export_path.path) + # roundtrip the container + exported_container = self.roundtripExportContainer( + container=container, + write_path=write_path, + export_path=export_path) + # assert that the roundtrip worked correctly + message = "Using: write_path=%s, export_path=%s" % (str(write_path), str(export_path)) + self.assertIsNotNone(str(container), message) # added as a test to make sure printing works + self.assertIsNotNone(str(exported_container), message) + # make sure we get a completely new object + self.assertNotEqual(id(container), id(exported_container), message) + # the name of the root container of a file is always 'root' (see h5tools.py ROOT_NAME) + # thus, ignore the name of the container when comparing original container vs read container + self.assertContainerEqual(container, + exported_container, + ignore_name=self.IGNORE_NAME, + ignore_hdmf_attrs=self.IGNORE_HDMF_ATTRS, + ignore_string_to_byte=self.IGNORE_STRING_TO_BYTE, + message=message) + self.close_files_and_ios() + # TODO: May need to add further asserts here + + +########################################################## +# Mixins for tesing export between different backend IO +######################################################### class MixinTestHDF5ToZarr(): """ Mixin class used in conjunction with MixinTestCaseConvert to create conversion tests from HDF5 to Zarr. @@ -109,18 +191,24 @@ class MixinTestHDF5ToZarr(): The setUpContainer function required for the test needs to be defined separately (e.g., by another mixin or the test class itself). """ + WRITE_PATHS = [None, ] + EXPORT_PATHS = [None, + DirectoryStore('test_export_DirectoryStore.zarr'), + TempStore(), + NestedDirectoryStore('test_export_NestedDirectoryStore.zarr')] + def get_manager(self): return get_hdmfcommon_manager() - def roundtripExportContainer(self): - with HDF5IO(self.filename, manager=self.get_manager(), mode='w') as write_io: - write_io.write(self.container, cache_spec=True) + def roundtripExportContainer(self, container, write_path, export_path): + with HDF5IO(write_path, manager=self.get_manager(), mode='w') as write_io: + write_io.write(container, cache_spec=True) - with HDF5IO(self.filename, manager=self.get_manager(), mode='r') as read_io: - with ZarrIO(self.export_filename, mode='w') as export_io: + with HDF5IO(write_path, manager=self.get_manager(), mode='r') as read_io: + with ZarrIO(export_path, mode='w') as export_io: export_io.export(src_io=read_io, write_args={'link_data': False}) - read_io = ZarrIO(self.export_filename, manager=self.get_manager(), mode='r') + read_io = ZarrIO(export_path, manager=self.get_manager(), mode='r') self.ios.append(read_io) exportContainer = read_io.read() return exportContainer @@ -133,38 +221,81 @@ class MixinTestZarrToHDF5(): The setUpContainer function required for the test needs to be defined separately (e.g., by another mixin or the test class itself) """ + WRITE_PATHS = [None, + DirectoryStore('test_export_DirectoryStore.zarr'), + TempStore(), + NestedDirectoryStore('test_export_NestedDirectoryStore.zarr')] + EXPORT_PATHS = [None, ] + def get_manager(self): return get_hdmfcommon_manager() - def roundtripExportContainer(self): - with ZarrIO(self.filename, manager=self.get_manager(), mode='w') as write_io: - write_io.write(self.container, cache_spec=True) + def roundtripExportContainer(self, container, write_path, export_path): + with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io: + write_io.write(container, cache_spec=True) - with ZarrIO(self.filename, manager=self.get_manager(), mode='r') as read_io: - with HDF5IO(self.export_filename, mode='w') as export_io: + with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io: + with HDF5IO(export_path, mode='w') as export_io: export_io.export(src_io=read_io, write_args={'link_data': False}) - read_io = HDF5IO(self.export_filename, manager=self.get_manager(), mode='r') + read_io = HDF5IO(export_path, manager=self.get_manager(), mode='r') + self.ios.append(read_io) + exportContainer = read_io.read() + return exportContainer + + +class MixinTestZarrToZarr(): + """ + Mixin class used in conjunction with MixinTestCaseConvert to create conversion tests from Zarr to Zarr. + This class only defines the roundtripExportContainer and get_manager functions for the test. + The setUpContainer function required for the test needs to be defined separately + (e.g., by another mixin or the test class itself) + """ + WRITE_PATHS = [None, + DirectoryStore('test_export_DirectoryStore_Source.zarr'), + TempStore(dir=os.path.dirname(__file__)), # set dir to avoid switching drives on Windows + NestedDirectoryStore('test_export_NestedDirectoryStore_Source.zarr')] + EXPORT_PATHS = [None, + DirectoryStore('test_export_DirectoryStore_Export.zarr'), + TempStore(dir=os.path.dirname(__file__)), # set dir to avoid switching drives on Windows + NestedDirectoryStore('test_export_NestedDirectoryStore_Export.zarr')] + + def get_manager(self): + return get_hdmfcommon_manager() + + def roundtripExportContainer(self, container, write_path, export_path): + with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io: + write_io.write(container, cache_spec=True) + + with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io: + with ZarrIO(export_path, mode='w') as export_io: + export_io.export(src_io=read_io, write_args={'link_data': False}) + + read_io = ZarrIO(export_path, manager=self.get_manager(), mode='r') self.ios.append(read_io) exportContainer = read_io.read() return exportContainer +############################################ +# HDMF Common test container mixins +########################################### class MixinTestDynamicTableContainer(): """ Mixin class used in conjunction with MixinTestCaseConvert to create conversion tests that test export of DynamicTable container classes. This class only defines the setUpContainer function for the test. The roundtripExportContainer function required for the test needs to be defined separately (e.g., by another mixin or the test class itself) - This mixin adds the class variable, TABLE_TYPE which is an int to select between different + This mixin adds the class variable, ``TABLE_TYPE`` which is an int to select between different container types for testing: - TABLE_TYPE=0 : Table of int, float, bool, Enum - TABLE_TYPE=1 : Table of int, float, str, bool, Enum + + * ``TABLE_TYPE=0`` : Table of int, float, bool, Enum + * ``TABLE_TYPE=1`` : Table of int, float, str, bool, Enum """ TABLE_TYPE = 0 def setUpContainer(self): - # TODO: The tables are names "root" because otherwise the Zarr backend does not determine the path correctly + # TODO: The tables are named "root" because otherwise the Zarr backend does not determine the path correctly if self.TABLE_TYPE == 0: table = DynamicTable(name=ROOT_NAME, description='an example table') @@ -211,39 +342,27 @@ def setUpContainer(self): ######################################### # HDMF Foo test container test harness ######################################### -class MixinTestZarrToHDF5Foo(MixinTestZarrToHDF5): - """ - Convert mixin for Zarr to HDF5 but using the BuildManager for the Foo test containers - """ - def get_manager(self): - return get_foo_buildmanager() - - -class MixinTestHDF5ToZarrFoo(MixinTestHDF5ToZarr): - """ - Convert mixin for HDF5 to Zarr but using the BuildManager for the Foo test containers - """ - def get_manager(self): - return get_foo_buildmanager() - - class MixinTestFoo(): """ Mixin class used in conjunction with MixinTestCaseConvert to create conversion tests that test export of a variety of Foo container classes. This class only defines the setUpContainer - function for the test. The roundtripExportContainer and get_manager function required for + and get_manager functions. The roundtripExportContainer function required for the test needs to be defined separately, e.g., by another mixin for Foo test cases, e.g., - MixinTestZarrToHDF5Foo or MixinTestHDF5ToZarrFoo. + MixinTestZarrToHDF5, MixinTestHDF5ToZarr, or MixinTestZarrToZarr This mixin adds the class variable, FOO_TYPE which is an int to select between different container types for testing: - FOO_TYPE=0 : File with two Foo buckets storing integer datasets - FOO_TYPE=1 : File with one Foo buckets storing integer dataset and a SoftLink to it + + * ``FOO_TYPE=0`` : File with two Foo buckets storing integer datasets + * ``FOO_TYPE=1`` : File with one Foo buckets storing integer dataset and a SoftLink to it """ FOO_TYPE = 0 FOO_TYPES = {'int_data': 0, 'link_data': 1, 'str_data': 2} + def get_manager(self): + return get_foo_buildmanager() + def setUpContainer(self): if self.FOO_TYPE == 0: foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) @@ -276,6 +395,9 @@ class TestHDF5ToZarrDynamicTableC0(MixinTestDynamicTableContainer, IGNORE_STRING_TO_BYTE = False TABLE_TYPE = 0 + def test_simple(self, write_path=None, export_path=None): + print(write_path, export_path) + class TestZarrToHDF5DynamicTableC0(MixinTestDynamicTableContainer, MixinTestZarrToHDF5, @@ -291,6 +413,20 @@ class TestZarrToHDF5DynamicTableC0(MixinTestDynamicTableContainer, TABLE_TYPE = 0 +class TestZarrToZarrDynamicTableC0(MixinTestDynamicTableContainer, + MixinTestZarrToZarr, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of DynamicTable containers from Zarr to HDF5. + See MixinTestDynamicTableContainer.setUpContainer for the container spec. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = False + TABLE_TYPE = 0 + + class TestHDF5ToZarrDynamicTableC1(MixinTestDynamicTableContainer, MixinTestHDF5ToZarr, MixinTestCaseConvert, @@ -319,6 +455,20 @@ class TestZarrToHDF5DynamicTableC1(MixinTestDynamicTableContainer, TABLE_TYPE = 1 +class TestZarrToZarrDynamicTableC1(MixinTestDynamicTableContainer, + MixinTestZarrToZarr, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of DynamicTable containers from Zarr to HDF5. + See MixinTestDynamicTableContainer.setUpContainer for the container spec. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True # Need to ignore conversion of strings to bytes + TABLE_TYPE = 1 + + class TestHDF5ToZarrCSRMatrix(MixinTestCSRMatrix, MixinTestHDF5ToZarr, MixinTestCaseConvert, @@ -343,8 +493,34 @@ class TestZarrToHDF5CSRMatrix(MixinTestCSRMatrix, IGNORE_STRING_TO_BYTE = False +class TestZarrToZarrCSRMatrix(MixinTestCSRMatrix, + MixinTestZarrToZarr, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of CSRMatrix containers from Zarr to HDF5. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = False + + class TestZarrToHDF5FooCase1(MixinTestFoo, - MixinTestZarrToHDF5Foo, + MixinTestZarrToHDF5, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of a simple Foo container with two buckets of datasets from Zarr to HDF5 + See MixinTestFoo.setUpContainer for the container spec used. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True + FOO_TYPE = MixinTestFoo.FOO_TYPES['int_data'] + + +class TestZarrToZarrFooCase1(MixinTestFoo, + MixinTestZarrToZarr, MixinTestCaseConvert, TestCase): """ @@ -358,7 +534,7 @@ class TestZarrToHDF5FooCase1(MixinTestFoo, class TestHDF5toZarrFooCase1(MixinTestFoo, - MixinTestHDF5ToZarrFoo, + MixinTestHDF5ToZarr, MixinTestCaseConvert, TestCase): """ @@ -372,7 +548,21 @@ class TestHDF5toZarrFooCase1(MixinTestFoo, class TestZarrToHDF5FooCase2(MixinTestFoo, - MixinTestZarrToHDF5Foo, + MixinTestZarrToHDF5, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of a simple Foo container with two buckets of datasets from Zarr to HDF5 + See MixinTestFoo.setUpContainer for the container spec used. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True + FOO_TYPE = MixinTestFoo.FOO_TYPES['link_data'] + + +class TestZarrToZarrFooCase2(MixinTestFoo, + MixinTestZarrToZarr, MixinTestCaseConvert, TestCase): """ @@ -386,7 +576,7 @@ class TestZarrToHDF5FooCase2(MixinTestFoo, class TestHDF5toZarrFooCase2(MixinTestFoo, - MixinTestHDF5ToZarrFoo, + MixinTestHDF5ToZarr, MixinTestCaseConvert, TestCase): """ diff --git a/tests/unit/test_zarrio.py b/tests/unit/test_zarrio.py new file mode 100644 index 00000000..e1526282 --- /dev/null +++ b/tests/unit/test_zarrio.py @@ -0,0 +1,124 @@ +""" +Module for testing different Zarr storage backends + +This module uses the tests defined in base_tests_zarrio.py and runs them for +the different storage backends supported by ZarrIO. Specifically, the +BaseTestZarrWriter, BaseTestZarrWriteUnit, and BaseTestExportZarrToZarr classes +are used as base classes and the setUp and tearDown methods are customized +to use different backends. I.e, any tests that are being added to those +classes will then be run here with all different backends so that we don't +need to implement the tests separately for the different backends. +""" +from tests.unit.base_tests_zarrio import (BaseTestZarrWriter, + BaseTestZarrWriteUnit, + BaseTestExportZarrToZarr) +from zarr.storage import (DirectoryStore, + TempStore, + NestedDirectoryStore) + + +###################################################### +# Default storage backend using just a string path +###################################################### +class TestZarrWriterDefaultStore(BaseTestZarrWriter): + """ + Test writing of builder with Zarr using the default store. + + All settings are already defined in the BaseTestZarrWriter class so we here only + need to instantiate the class to run the tests. + """ + pass + + +class TestZarrWriteUnitDefaultStore(BaseTestZarrWriteUnit): + """ + Unit test for individual write functions using the default store. + + All settings are already defined in the BaseTestZarrWriter class so we here only + need to instantiate the class to run the tests. + """ + pass + + +class TestExportZarrToZarrDefaultStore(BaseTestExportZarrToZarr): + """ + Test exporting Zarr to Zarr using the default store. + + All settings are already defined in the BaseTestZarrWriter class so we here only + need to instantiate the class to run the tests. + """ + pass + + +######################################### +# DirectoryStore tests +######################################### +class TestZarrWriterDirectoryStore(BaseTestZarrWriter): + """Test writing of builder with Zarr using a custom DirectoryStore""" + def setUp(self): + super().setUp() + self.store = DirectoryStore(self.store_path) + + +class TestZarrWriteUnitDirectoryStore(BaseTestZarrWriteUnit): + """Unit test for individual write functions using a custom DirectoryStore""" + def setUp(self): + self.store_path = "test_io.zarr" + self.store = DirectoryStore(self.store_path) + + +class TestExportZarrToZarrDirectoryStore(BaseTestExportZarrToZarr): + """Test exporting Zarr to Zarr using DirectoryStore""" + def setUp(self): + super().setUp() + self.store = [DirectoryStore(p) for p in self.store_path] + + +######################################### +# TempStore tests +######################################### +class TestZarrWriterTempStore(BaseTestZarrWriter): + """Test writing of builder with Zarr using a custom TempStore""" + def setUp(self): + super().setUp() + self.store = TempStore() + self.store_path = self.store.path + + +class TestZarrWriteUnitTempStore(BaseTestZarrWriteUnit): + """Unit test for individual write functions using a custom TempStore""" + def setUp(self): + self.store = TempStore() + self.store_path = self.store.path + + +class TestExportZarrToZarrTempStore(BaseTestExportZarrToZarr): + """Test exporting Zarr to Zarr using TempStore.""" + def setUp(self): + super().setUp() + self.store = [TempStore() for i in range(len(self.store_path))] + self.store_path = [s.path for s in self.store] + + +######################################### +# NestedDirectoryStore tests +######################################### +class TestZarrWriterNestedDirectoryStore(BaseTestZarrWriter): + """Test writing of builder with Zarr using a custom NestedDirectoryStore""" + def setUp(self): + super().setUp() + self.store = NestedDirectoryStore(self.store_path) + + +class TestZarrWriteUnitNestedDirectoryStore(BaseTestZarrWriteUnit): + """Unit test for individual write functions using a custom NestedDirectoryStore""" + def setUp(self): + self.store_path = "test_io.zarr" + self.store = NestedDirectoryStore(self.store_path) + + +class TestExportZarrToZarrNestedDirectoryStore(BaseTestExportZarrToZarr): + """Test exporting Zarr to Zarr using NestedDirectoryStore""" + def setUp(self): + super().setUp() + self.store = [NestedDirectoryStore(p) for p in self.store_path] From d04d8d3fa015224839faa2c6d20e189710594d62 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 18 Jan 2023 01:00:48 -0800 Subject: [PATCH 04/27] Fix broken external link to GitHub file in docs (#70) --- docs/source/integrating_data_stores.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/integrating_data_stores.rst b/docs/source/integrating_data_stores.rst index 86c886bc..d7573e73 100644 --- a/docs/source/integrating_data_stores.rst +++ b/docs/source/integrating_data_stores.rst @@ -62,7 +62,7 @@ test_zarrio.py `base_tests_zarrio.py `_ provides a collection of base classes that define common test cases to test basic functionality of :py:class:`~hdmf_zarr.backend.ZarrIO`. Using these base classes, the -`test_zarrio.py `_ module +`test_zarrio.py `_ module then implements concrete tests for various backends. To create tests for a new data store, we need to add the following main classes (while ```` in the code below would need to be replaced with the class name of the new data store): From e4e9543e53efaefc31b74c4fd8557353cb0b9121 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 18 Jan 2023 03:07:17 -0800 Subject: [PATCH 05/27] Remove ZarrIO development status warning (#71) --- src/hdmf_zarr/backend.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 659ea68e..7ef8b36c 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -114,9 +114,6 @@ def __init__(self, **kwargs): if isinstance(self.__path, SUPPORTED_ZARR_STORES): source_path = self.__path.path super().__init__(manager, source=source_path) - warn_msg = ("The ZarrIO backend is experimental. It is under active development. " - "The ZarrIO backend may change any time and backward compatibility is not guaranteed.") - warnings.warn(warn_msg) @property def file(self): From 7a31762c669180e91c5c7077d504ab7b6a96b72b Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 9 May 2023 10:34:30 -0700 Subject: [PATCH 06/27] Remove codecov as dependency (#74) --- requirements-dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index fec71b98..cb72d345 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,5 @@ # pinned dependencies to reproduce an entire development environment to use HDMF, run HDMF tests, check code style, # compute coverage, and create test environments -codecov==2.1.12 coverage==6.4.2 flake8==5.0.4 flake8-debugger==4.1.2 From 87e2d75380442d912fcf4b6c210502fe3e139906 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 9 May 2023 11:02:07 -0700 Subject: [PATCH 07/27] Update run_all_tests.yml (#77) Fix #76 --- .github/workflows/run_all_tests.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 2eb6a9d7..3e2dc3c0 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -152,23 +152,30 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true - activate-environment: true python-version: ${{ matrix.python-ver }} - name: Install build dependencies run: | conda config --set always_yes yes --set changeps1 no conda info - conda install -c conda-forge tox - conda list + # the conda dependency resolution for tox under python 3.7 can install the wrong importlib_metadata + conda install -c conda-forge tox "importlib_metadata>4" + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls + - name: Run tox tests run: | tox -e ${{ matrix.test-tox-env }} + - name: Build wheel and source distribution run: | tox -e ${{ matrix.build-tox-env }} ls -1 dist + - name: Test installation from a wheel run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl From 708e1a7ecbe65f2c6267ef1b2bf4bcc42057254b Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Tue, 9 May 2023 14:34:07 -0700 Subject: [PATCH 08/27] Gallery Bug (#78) * first * quick revert * revert * Update setup.py * Update requirements-min.txt * Update setup.py * Update setup.py * Update requirements-min.txt * Update requirements.txt * Update setup.py * Update setup.py --- requirements-min.txt | 3 ++- requirements.txt | 4 ++-- setup.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/requirements-min.txt b/requirements-min.txt index bf4d276f..003d102b 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,5 +1,6 @@ -hdmf==3.5.0 +hdmf==3.5.2 zarr==2.11.0 numcodecs==0.9.1 pynwb==2.0.0 setuptools +importlib_resources;python_version<'3.9' # Remove when python 3.9 becomes the new minimum diff --git a/requirements.txt b/requirements.txt index 69d3947b..e2825e8d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # pinned dependencies to reproduce an entire development environment to use HDMF-ZARR -hdmf==3.5.0 +hdmf==3.5.2 zarr==2.11.0 numcodecs==0.9.1 -pynwb==2.0.1 \ No newline at end of file +pynwb==2.0.1 diff --git a/setup.py b/setup.py index 50953471..17add269 100755 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ reqs = [ - 'hdmf>=3.5.0', + 'hdmf<=3.5.4, >=3.5.2', 'zarr>=2.11.0', 'numcodecs>=0.9.1', 'pynwb>=2.0.0', From 16078627dea83db05e4d03fbabd403adcb7216ab Mon Sep 17 00:00:00 2001 From: Alessio Buccino Date: Wed, 10 May 2023 00:01:27 +0200 Subject: [PATCH 09/27] Fix reading dtype from zarr dataset (#72) * Fix reading dtype from zarr dataset --------- Co-authored-by: Oliver Ruebel --- src/hdmf_zarr/backend.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 7ef8b36c..7c364864 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -670,6 +670,7 @@ def __setup_chunked_dataset__(cls, parent, name, data, options=None): io_settings['dtype'] = cls.__dtypes.get(io_settings['dtype']) try: dset = parent.create_dataset(name, **io_settings) + dset.attrs['zarr_dtype'] = np.dtype(io_settings['dtype']).str except Exception as exc: raise Exception("Could not create dataset %s in %s" % (name, parent.name)) from exc return dset @@ -1099,11 +1100,18 @@ def __read_dataset(self, zarr_obj, name): if ret is not None: return ret - if 'zarr_dtype' not in zarr_obj.attrs: + if 'zarr_dtype' in zarr_obj.attrs: + zarr_dtype = zarr_obj.attrs['zarr_dtype'] + elif hasattr(zarr_obj, 'dtype'): # Fallback for invalid files that are mssing zarr_type + zarr_dtype = zarr_obj.dtype + warnings.warn( + "Inferred dtype from zarr type. Dataset missing zarr_dtype: " + str(name) + " " + str(zarr_obj) + ) + else: raise ValueError("Dataset missing zarr_dtype: " + str(name) + " " + str(zarr_obj)) kwargs = {"attributes": self.__read_attrs(zarr_obj), - "dtype": zarr_obj.attrs['zarr_dtype'], + "dtype": zarr_dtype, "maxshape": zarr_obj.shape, "chunks": not (zarr_obj.shape == zarr_obj.chunks), "source": self.source} From 94232a8fe4b76a629313bc739c48fa79f6727d4a Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 9 May 2023 15:57:33 -0700 Subject: [PATCH 10/27] Delete test.py (#84) --- test.py | 163 -------------------------------------------------------- 1 file changed, 163 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index a5a55603..00000000 --- a/test.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python - -# NOTE this script is currently used in CI *only* to test the sphinx gallery examples using python test.py -e -import warnings -import re -import argparse -import logging -import os.path -import os -import sys -import traceback -import unittest - -flags = {'hdmf_zarr': 1, 'example': 4} - -TOTAL = 0 -FAILURES = 0 -ERRORS = 0 - - -class SuccessRecordingResult(unittest.TextTestResult): - '''A unittest test result class that stores successful test cases as well - as failures and skips. - ''' - - def addSuccess(self, test): - if not hasattr(self, 'successes'): - self.successes = [test] - else: - self.successes.append(test) - - def get_all_cases_run(self): - '''Return a list of each test case which failed or succeeded - ''' - cases = [] - - if hasattr(self, 'successes'): - cases.extend(self.successes) - cases.extend([failure[0] for failure in self.failures]) - - return cases - - -def run_test_suite(directory, description="", verbose=True): - global TOTAL, FAILURES, ERRORS - logging.info("running %s" % description) - directory = os.path.join(os.path.dirname(__file__), directory) - runner = unittest.TextTestRunner(verbosity=verbose, resultclass=SuccessRecordingResult) - test_result = runner.run(unittest.TestLoader().discover(directory)) - - TOTAL += test_result.testsRun - FAILURES += len(test_result.failures) - ERRORS += len(test_result.errors) - - return test_result - - -def _import_from_file(script): - import imp - return imp.load_source(os.path.basename(script), script) - - -warning_re = re.compile("Parent module '[a-zA-Z0-9]+' not found while handling absolute import") - - -def run_example_tests(): - global TOTAL, FAILURES, ERRORS - logging.info('running example tests') - - # get list of example scripts - examples_scripts = list() - for root, dirs, files in os.walk(os.path.join(os.path.dirname(__file__), "docs", "gallery")): - for f in files: - if f.endswith(".py"): - examples_scripts.append(os.path.join(root, f)) - - TOTAL += len(examples_scripts) - curr_dir = os.getcwd() - for script in examples_scripts: - os.chdir(curr_dir) # Reset the working directory - script_abs = os.path.abspath(script) # Determine the full path of the script - # Set the working dir to be relative to the script to allow the use of relative file paths in the scripts - os.chdir(os.path.dirname(script_abs)) - try: - logging.info("Executing %s" % script) - ws = list() - with warnings.catch_warnings(record=True) as tmp: - # Import/run the example gallery - _import_from_file(script_abs) - for w in tmp: # ignore RunTimeWarnings about importing - if isinstance(w.message, RuntimeWarning) and not warning_re.match(str(w.message)): - ws.append(w) - for w in ws: - warnings.showwarning(w.message, w.category, w.filename, w.lineno, w.line) - except Exception: - print(traceback.format_exc()) - FAILURES += 1 - ERRORS += 1 - # Make sure to reset the working directory at the end - os.chdir(curr_dir) - - -def main(): - warnings.warn( - "python test.py is deprecated. Please use pytest to run unit tests and run python test_gallery.py to " - "test Sphinx Gallery files.", - DeprecationWarning - ) - - # setup and parse arguments - parser = argparse.ArgumentParser('python test.py [options]') - parser.set_defaults(verbosity=1, suites=[]) - parser.add_argument('-v', '--verbose', const=2, dest='verbosity', action='store_const', help='run in verbose mode') - parser.add_argument('-q', '--quiet', const=0, dest='verbosity', action='store_const', help='run disabling output') - parser.add_argument('-u', '--unit', action='append_const', const=flags['hdmf_zarr'], dest='suites', - help='run unit tests for hdmf_zarr package') - parser.add_argument('-e', '--example', action='append_const', const=flags['example'], dest='suites', - help='run example tests') - args = parser.parse_args() - if not args.suites: - args.suites = list(flags.values()) - args.suites.pop(args.suites.index(flags['example'])) # remove example as a suite run by default - - # set up logger - root = logging.getLogger() - root.setLevel(logging.INFO) - ch = logging.StreamHandler(sys.stdout) - ch.setLevel(logging.INFO) - formatter = logging.Formatter('======================================================================\n' - '%(asctime)s - %(levelname)s - %(message)s') - ch.setFormatter(formatter) - root.addHandler(ch) - - warnings.simplefilter('always') - - # Run unit tests for hdmf_zarr package - if flags['hdmf_zarr'] in args.suites: - run_test_suite("tests/unit", "hdmf_zarr unit tests", verbose=args.verbosity) - - # Run example tests - if flags['example'] in args.suites: - run_example_tests() - - final_message = 'Ran %s tests' % TOTAL - exitcode = 0 - if ERRORS > 0 or FAILURES > 0: - exitcode = 1 - _list = list() - if ERRORS > 0: - _list.append('errors=%d' % ERRORS) - if FAILURES > 0: - _list.append('failures=%d' % FAILURES) - final_message = '%s - FAILED (%s)' % (final_message, ','.join(_list)) - else: - final_message = '%s - OK' % final_message - - logging.info(final_message) - - return exitcode - - -if __name__ == "__main__": - sys.exit(main()) From e6905dcce2e61ebbff3d548dc530a7f5f1175e2d Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 11 May 2023 23:28:27 -0700 Subject: [PATCH 11/27] Fix sphinx extlinks (#86) --- docs/source/conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index de6ff8cc..9620f954 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -88,9 +88,9 @@ # Use this for mapping to external links extlinks = { - 'pynwb-docs': ('https://pynwb.readthedocs.io/en/stable/', '%s'), - 'hdmf-docs': ('https://hdmf.readthedocs.io/en/stable/', '%s'), - 'zarr-docs': ('https://zarr.readthedocs.io/en/stable/', '%s') + 'pynwb-docs': ('https://pynwb.readthedocs.io/en/stable/%s', '%s'), + 'hdmf-docs': ('https://hdmf.readthedocs.io/en/stable/%s', '%s'), + 'zarr-docs': ('https://zarr.readthedocs.io/en/stable/%s', '%s') } # Add any paths that contain templates here, relative to this directory. From 9da715648d7fe45afd9650774747e23b2586d794 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 15 May 2023 18:09:13 -0700 Subject: [PATCH 12/27] Add templates, code of conduct, and contribution.rst (#88) * templates * Update release.md * Update CONTRIBUTING.rst --- .github/CODE_OF_CONDUCT.md | 46 +++++++ .github/ISSUE_TEMPLATE/bug_report.yml | 94 +++++++++++++++ .github/ISSUE_TEMPLATE/config.yml | 5 + .github/ISSUE_TEMPLATE/documentation.yml | 46 +++++++ .github/ISSUE_TEMPLATE/feature_request.yml | 64 ++++++++++ .github/PULL_REQUEST_TEMPLATE/release.md | 24 ++++ .github/pull_request_template.md | 17 +++ docs/CONTRIBUTING.rst | 132 +++++++++++++++++++++ 8 files changed, 428 insertions(+) create mode 100644 .github/CODE_OF_CONDUCT.md create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/documentation.yml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE/release.md create mode 100644 .github/pull_request_template.md create mode 100644 docs/CONTRIBUTING.rst diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..e6ec6ccb --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,46 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others’ private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000..a1db846c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,94 @@ +name: Bug Report +description: File a bug report. +title: "[Bug]: " +labels: ["bug"] + +body: + - type: markdown + attributes: + value: | + # Thanks for taking the time to fill out this bug report! + ### The following information will help us resolve your issue. + - type: textarea + id: what-happened + attributes: + label: What happened? + description: Also tell us, what did you expect to happen? + validations: + required: true + - type: textarea + id: reproduce + attributes: + label: Steps to Reproduce + description: | + Please copy and paste the code you were trying to run that caused the error. + + Feel free to include as little or as much as you think is relevant. This section will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true + - type: textarea + id: traceback + attributes: + label: Traceback + description: | + Please copy and paste the full traceback produced by the error. + + This section will be automatically formatted into code, so no need for backticks. + render: shell + validations: + required: true + - type: dropdown + id: os + attributes: + label: Operating System + options: + - Windows + - macOS + - Linux + validations: + required: true + - type: dropdown + id: executable + attributes: + label: Python Executable + options: + - Conda + - Python + validations: + required: true + - type: dropdown + id: python_version + attributes: + label: Python Version + options: + - "3.7" + - "3.8" + - "3.9" + - "3.10" + - "3.11" + validations: + required: true + - type: textarea + id: package_versions + attributes: + label: Package Versions + description: | + Please share your currently installed Python package versions by calling `pip freeze > environment_for_issue.txt` and uploading the text file along with this issue. + + This helps us determine if there are any secondary or tertiary issues caused by other dependencies. + + You can attach images or log files by clicking this area to highlight it and then dragging files in. + If GitHub upload is not working, you can also copy and paste the output into this section. + - type: checkboxes + id: terms + attributes: + label: Code of Conduct + description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/.github/CODE_OF_CONDUCT.md) + options: + - label: I agree to follow this project's [Code of Conduct](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/.github/CODE_OF_CONDUCT.md) + required: true + - label: Have you checked the [Contributing](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/docs/CONTRIBUTING.rst) document? + required: true + - label: Have you ensured this bug was not already [reported](https://github.com/hdmf-dev/hdmf-zarr/issues)? + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..9a5863af --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: Read the Documentation + url: https://hdmf-zarr.readthedocs.io/en/latest/ + about: Check out the full documentation. diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml new file mode 100644 index 00000000..1558a13e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.yml @@ -0,0 +1,46 @@ +name: Documentation +description: Is the documentation of something missing, unclear, or lacking? This is the place. +title: "[Documentation]: " +labels: "documentation" +body: + - type: markdown + attributes: + value: | + ## Thank you for your suggestion! + + We welcome any ideas about how to make **HDMF-ZARR** better for the community. + + Please keep in mind that new or improved documentation may not get implemented immediately. + - type: textarea + id: summary + attributes: + label: What would you like changed or added to the documentation and why? + description: Do you have any suggestions for the documents? + validations: + required: true + - type: dropdown + id: interest + attributes: + label: Do you have any interest in helping write or edit the documentation? + description: | + We appreciate any help you can offer! + + For information on how to contribute, please refer to our [contributing guidelines](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/docs/CONTRIBUTING.rst). + options: + - Yes. + - Yes, but I would need guidance. + - No. + validations: + required: true + - type: checkboxes + id: terms + attributes: + label: Code of Conduct + description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/.github/CODE_OF_CONDUCT.md) + options: + - label: I agree to follow this project's [Code of Conduct](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/.github/CODE_OF_CONDUCT.md) + required: true + - label: Have you checked the [Contributing](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/docs/CONTRIBUTING.rst) document? + required: true + - label: Have you ensured this change was not already [requested](https://github.com/hdmf-dev/hdmf-zarr/issues)? + required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000..85462b52 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,64 @@ +name: Feature Request +description: Suggest an idea for a brand new feature, or a change to an existing one. +title: "[Feature]: " +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + ## Thank you for your suggestion! + + We welcome any ideas about how to make **HDMF-ZARR** better for the community. + + Please keep in mind that new features may not get implemented immediately. + - type: textarea + id: summary + attributes: + label: What would you like to see added to HDMF-ZARR? + description: | + What are you trying to achieve with **HDMF-ZARR**? + + Is this a more convenient way to do something that is already possible, or is a workaround currently unfeasible? + validations: + required: true + - type: textarea + id: problem + attributes: + label: Is your feature request related to a problem? + description: A clear and concise description of what the problem is. + - type: textarea + id: solution + attributes: + label: What solution would you like? + description: | + A clear and concise description of what you want to happen. + + Describe alternative solutions you have considered. + validations: + required: true + - type: dropdown + id: interest + attributes: + label: Do you have any interest in helping implement the feature? + description: | + We appreciate any help you can offer! + + For information on how to contribute, please refer to our [contributing guidelines](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/docs/CONTRIBUTING.rst). + options: + - Yes. + - Yes, but I would need guidance. + - No. + validations: + required: true + - type: checkboxes + id: terms + attributes: + label: Code of Conduct + description: By submitting this issue, you agree to follow our [Code of Conduct](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/.github/CODE_OF_CONDUCT.md) + options: + - label: I agree to follow this project's [Code of Conduct](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/.github/CODE_OF_CONDUCT.md) + required: true + - label: Have you checked the [Contributing](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/docs/CONTRIBUTING.rst) document? + required: true + - label: Have you ensured this change was not already [requested](https://github.com/hdmf-dev/hdmf-zarr/issues)? + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md new file mode 100644 index 00000000..13893a09 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -0,0 +1,24 @@ +Prepare for release of HDMF [version] + +### Before merging: +- [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, + `requirements-doc.txt`, `requirements-min.txt`, and `setup.py` as needed +- [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, + and any other locations as needed +- [ ] Update `setup.py` as needed +- [ ] Update `README.rst` as needed +- [ ] Update changelog (set release date) in `CHANGELOG.md` and any other docs as needed +- [ ] Run tests locally including gallery tests, and inspect all warnings and outputs + (`pytest && python test_gallery.py`) +- [ ] Test docs locally by going into the `docs` directory and running the following: `make clean && make html` +- [ ] Push changes to this PR and make sure all PRs to be included in this release have been merged +- [ ] Check that the readthedocs build for this PR succeeds (build latest to pull the new branch, then activate and + build docs for new branch): https://readthedocs.org/projects/hdmf-zarr/builds/ + +### After merging: +1. Create release by following steps in `docs/source/make_a_release.rst` or use alias `git pypi-release [tag]` if set up +2. After the CI bot creates the new release (wait ~10 min), update the release notes on the + [GitHub releases page](https://github.com/hdmf-dev/hdmf-zarr/releases) with the changelog +3. Check that the readthedocs "latest" and "stable" builds run and succeed +4. Update [conda-forge/hdmf_zarr-feedstock](https://github.com/conda-forge/hdmf_zarr-feedstock) with the latest version number + and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render as needed diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..8435ea82 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,17 @@ +## Motivation + +What was the reasoning behind this change? Please explain the changes briefly. + +## How to test the behavior? +``` +Show how to reproduce the new behavior (can be a bug fix or a new feature) +``` + +## Checklist + +- [ ] Did you update CHANGELOG.md with your changes? +- [ ] Have you checked our [Contributing](https://github.com/hdmf-dev/hdmf-zarr/blob/dev/docs/CONTRIBUTING.rst) document? +- [ ] Have you ensured the PR clearly describes the problem and the solution? +- [ ] Is your contribution compliant with our coding style? This can be checked running `ruff` from the source directory. +- [ ] Have you checked to ensure that there aren't other open [Pull Requests](https://github.com/hdmf-dev/hdmf-zarr/pulls) for the same change? +- [ ] Have you included the relevant issue number using "Fix #XXX" notation where XXX is the issue number? By including "Fix #XXX" you allow GitHub to close issue #XXX when the PR is merged. diff --git a/docs/CONTRIBUTING.rst b/docs/CONTRIBUTING.rst new file mode 100644 index 00000000..4daeaf54 --- /dev/null +++ b/docs/CONTRIBUTING.rst @@ -0,0 +1,132 @@ +Contributing Guide +================== + +.. _sec-code-of-conduct: + +Code of Conduct +--------------- + +This project and everyone participating in it is governed by our `code of conduct guidelines `_. By participating, you are expected to uphold this code. Please report unacceptable behavior. + +.. _sec-contribution-types: + +Types of Contributions +---------------------- + +Did you find a bug? or Do you intend to add a new feature or change an existing one? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* **Submit issues and requests** using our `issue tracker `_ + +* **Ensure the feature or change was not already reported** by searching on GitHub under `HDMF-ZARR Issues `_ + +* If you are unable to find an open issue addressing the problem then open a new issue on the respective repository. Be sure to use our issue templates and include: + + * **brief and descriptive title** + * **clear description of the problem you are trying to solve**. Describing the use case is often more important than proposing a specific solution. By describing the use case and problem you are trying to solve gives the development team community a better understanding for the reasons of changes and enables others to suggest solutions. + * **context** providing as much relevant information as possible and if available a **code sample** or an **executable test case** demonstrating the expected behavior and/or problem. + +* Be sure to select the appropriate label (bug report or feature request) for your tickets so that they can be processed accordingly. + +* HDMF-ZARR is currently being developed primarily by staff at scientific research institutions and industry, most of which work on many different research projects. Please be patient, if our development team is not able to respond immediately to your issues. In particular issues that belong to later project milestones may not be reviewed or processed until work on that milestone begins. + +Did you write a patch that fixes a bug or implements a new feature? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +See the :ref:`sec-contributing` section below for details. + +Did you fix whitespace, format code, or make a purely cosmetic patch in source code? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Source code changes that are purely cosmetic in nature and do not add anything substantial to the stability, functionality, or testability will generally not be accepted unless they have been approved beforehand. One of the main reasons is that there are a lot of hidden costs in addition to writing the code itself, and with the limited resources of the project, we need to optimize developer time. E.g,. someone needs to test and review PRs, backporting of bug fixes gets harder, it creates noise and pollutes the git repo and many other cost factors. + +Do you have questions about HDMF-ZARR? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +See our `hdmf-dev.github.io `_ website for details. + +Informal discussions between developers and users? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The https://nwb-users.slack.com slack is currently used for informal discussions between developers and users. + +.. _sec-contributing: + +Contributing Patches and Changes +-------------------------------- + +To contribute to HDMF-ZARR you must submit your changes to the ``dev`` branch via a `Pull Request `_. + +From your local copy directory, use the following commands. + +1) First create a new branch to work on + +.. code-block:: bash + + $ git checkout -b + +2) Make your changes. + +3) Push your feature branch to origin (i.e. GitHub) + +.. code-block:: bash + + $ git push origin + +4) Once you have tested and finalized your changes, create a pull request targeting ``dev`` as the base branch. Be sure to use our `pull request template `_ and: + + * Ensure the PR description clearly describes the problem and solution. + * Include the relevant issue number if applicable. + * Before submitting, please ensure that: + * The proposed changes include an addition to ``CHANGELOG.md`` describing your changes. To label the change with the PR number, you will have to first create the PR, then edit the ``CHANGELOG.md`` with the PR number, and push that change. + * The code follows our coding style. This can be checked running ``ruff`` from the source directory. + * **NOTE:** Contributed branches will be removed by the development team after the merge is complete and should, hence, not be used after the pull request is complete. + +.. _sec-styleguides: + +Style Guides +------------ + +Python Code Styleguide +^^^^^^^^^^^^^^^^^^^^^^ + +Before you create a Pull Request, make sure you are following the HDMF-ZARR style guide (PEP8_). +To check whether your code conforms to the HDMF-ZARR style guide, simply run the flake8_ tool in the project's root +directory. + +.. _flake8: https://flake8.pycqa.org/en/latest/ +.. _PEP8: https://peps.python.org/pep-0008/ + +.. code:: + + $ flake8 + +Git Commit Message Styleguide +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Use the present tense ("Add feature" not "Added feature") +* The first should be short and descriptive. +* Additional details may be included in further paragraphs. +* If a commit fixes an issue, then include "Fix #X" where X is the number of the issue. +* Reference relevant issues and pull requests liberally after the first line. + +Documentation Styleguide +^^^^^^^^^^^^^^^^^^^^^^^^ + +All documentations is written in reStructuredText (RST) using Sphinx. + +Endorsement +----------- + +Please do not take working with an organization (e.g., during a hackathon or via GitHub) as an endorsement of your work or your organization. It's okay to say e.g., “We worked with XXXXX to advance science” but not e.g., “XXXXX supports our work on HDMF-ZARR”.” + +License and Copyright +--------------------- + +See the `license `_ files for details about the copyright and license. + +As indicated in the HDMF-ZARR license: *“You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form.”* + +Contributors to the HDMF-ZARR code base are expected to use a permissive, non-copyleft open source license. Typically 3-clause BSD is used, but any compatible license is allowed, the MIT and Apache 2.0 licenses being good alternative choices. The GPL and other copyleft licenses are not allowed due to the consternation it generates across many organizations. + +Also, make sure that you are permitted to contribute code. Some organizations, even academic organizations, have agreements in place that discuss IP ownership in detail (i.e., address IP rights and ownership that you create while under the employ of the organization). These are typically signed documents that you looked at on your first day of work and then promptly forgot. We don't want contributed code to be yanked later due to IP issues. From 4207ca610e1df2a061a762c593288586ead59e03 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Mon, 15 May 2023 19:19:37 -0700 Subject: [PATCH 13/27] Add Support for Python 3.11 (#87) * Update check_external_links.yml * Update deploy_release.yml * Update run_all_tests.yml * Update run_coverage.yml * Update run_flake8.yml * Update run_tests.yml * Update conf.py * Update setup.py * Update tox.ini * Update tox.ini * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update tox.ini * Update tox.ini Co-authored-by: Ryan Ly * typo * order * typo * pin * pin * pin --------- Co-authored-by: Ryan Ly --- .github/workflows/check_external_links.yml | 2 +- .github/workflows/deploy_release.yml | 2 +- .github/workflows/run_all_tests.yml | 42 ++++++++------- .github/workflows/run_coverage.yml | 4 +- .github/workflows/run_flake8.yml | 2 +- .github/workflows/run_tests.yml | 16 +++--- docs/source/conf.py | 4 +- requirements.txt | 5 +- setup.py | 3 +- tox.ini | 60 ++++++++++++---------- 10 files changed, 75 insertions(+), 65 deletions(-) diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml index c084c229..1fbf0ee0 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_external_links.yml @@ -22,7 +22,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install Sphinx dependencies and package run: | diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 1287944f..23337005 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install build dependencies run: | diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 3e2dc3c0..0265360d 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -26,23 +26,26 @@ jobs: - { name: linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-prerelease , test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } + - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-prerelease , test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } - { name: windows-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: windows-latest } - { name: windows-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: windows-latest } - { name: windows-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-prerelease, test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: windows-latest } + - { name: windows-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-prerelease, test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: windows-latest } - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } - { name: macos-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: macos-latest } - { name: macos-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: macos-latest } - { name: macos-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-prerelease , test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: macos-latest } + - { name: macos-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-prerelease , test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -86,14 +89,14 @@ jobs: matrix: include: - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-gallery-python3.10-prerelease , test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } + - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: windows-latest } - - { name: windows-gallery-python3.10-prerelease, test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: windows-latest } - - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: macos-latest } - - { name: macos-gallery-python3.10-prerelease , test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: macos-latest } + - { name: windows-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: windows-gallery-python3.11-prerelease, test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: windows-latest } + - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-37-minimum , python-ver: "3.7" , os: macos-latest } + - { name: macos-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: macos-latest } + - { name: macos-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -133,9 +136,10 @@ jobs: - { name: conda-linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } - { name: conda-linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: conda-linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-prerelease, test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } + - { name: conda-linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-prerelease, test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index ae075ae1..142b0868 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -28,7 +28,7 @@ jobs: - { os: macos-latest , opt_req: false } env: OS: ${{ matrix.os }} - PYTHON: '3.10' + PYTHON: '3.11' steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -43,7 +43,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: ${{ env.PYTHON }} - name: Install dependencies run: | diff --git a/.github/workflows/run_flake8.yml b/.github/workflows/run_flake8.yml index bc31c994..042b9379 100644 --- a/.github/workflows/run_flake8.yml +++ b/.github/workflows/run_flake8.yml @@ -19,7 +19,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install flake8 run: | diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 84add80c..857b4159 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -20,13 +20,13 @@ jobs: matrix: include: - { name: linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } + - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - - { name: linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest , upload-wheels: true } + - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest , upload-wheels: true } - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: windows-latest } + - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: macos-latest } + - { name: macos-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -77,9 +77,9 @@ jobs: matrix: include: - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded, python-ver: "3.10", os: ubuntu-latest } + - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: ubuntu-latest } - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-gallery-python3.10-upgraded, test-tox-env: gallery-py310-upgraded, python-ver: "3.10", os: windows-latest } + - { name: windows-gallery-python3.11-upgraded, test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: windows-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -115,7 +115,7 @@ jobs: matrix: include: - { name: conda-linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: conda-linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } + - { name: conda-linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -172,7 +172,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Download wheel and source distributions from artifact uses: actions/download-artifact@v3 diff --git a/docs/source/conf.py b/docs/source/conf.py index 9620f954..d612aade 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -43,7 +43,7 @@ project = 'hdmf_zarr' copyright = '2022, Oliver Ruebel' -author = 'Oliver Ruebel' +author = 'Oliver Ruebel, Matthew Avaylon' # The short X.Y version. version = '{}'.format(get_versions()['version']) @@ -75,7 +75,7 @@ } intersphinx_mapping = { - 'python': ('https://docs.python.org/3.10', None), + 'python': ('https://docs.python.org/3.11', None), 'numpy': ('https://numpy.org/doc/stable/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/', None), 'matplotlib': ('https://matplotlib.org/stable/', None), diff --git a/requirements.txt b/requirements.txt index e2825e8d..fe306dc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # pinned dependencies to reproduce an entire development environment to use HDMF-ZARR hdmf==3.5.2 zarr==2.11.0 -numcodecs==0.9.1 -pynwb==2.0.1 +pynwb==2.3.0 +numpy==1.23.5 +numcodecs==0.11.0 diff --git a/setup.py b/setup.py index 17add269..1b6582d5 100755 --- a/setup.py +++ b/setup.py @@ -19,10 +19,10 @@ reqs = [ 'hdmf<=3.5.4, >=3.5.2', 'zarr>=2.11.0', + 'numpy>=1.22, <1.24; python_version>"3.7"', 'numcodecs>=0.9.1', 'pynwb>=2.0.0', 'setuptools', - 'numpy>=1.22, <1.24; python_version>"3.7"' ] print(reqs) @@ -49,6 +49,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "License :: OSI Approved :: BSD License", "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", diff --git a/tox.ini b/tox.ini index f8e7798e..69cf27bf 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py37, py38, py39, py310 +envlist = py37, py38, py39, py310, py311 requires = pip >= 22.0 [testenv] @@ -26,32 +26,32 @@ commands = # Env to create coverage report locally [testenv:localcoverage] -basepython = python3.10 +basepython = python3.11 commands = pytest --cov=hdmf_zarr coverage html -d tests/coverage/htmlcov -# Test with python 3.10; pinned dev and optional reqs -[testenv:py310-optional] -basepython = python3.10 +# Test with python 3.11; pinned dev and optional reqs +[testenv:py311-optional] +basepython = python3.11 install_command = python -m pip install {opts} {packages} deps = -rrequirements-dev.txt commands = {[testenv]commands} -# Test with python 3.10; pinned dev and optional reqs; upgraded run reqs -[testenv:py310-upgraded] -basepython = python3.10 +# Test with python 3.11; pinned dev and optional reqs; upgraded run reqs +[testenv:py311-upgraded] +basepython = python3.11 install_command = python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt commands = {[testenv]commands} -# Test with python 3.10; pinned dev and optional reqs; upgraded, pre-release run reqs -[testenv:py310-prerelease] -basepython = python3.10 +# Test with python 3.11; pinned dev and optional reqs; upgraded, pre-release run reqs +[testenv:py311-prerelease] +basepython = python3.11 install_command = python -m pip install -U --pre {opts} {packages} deps = @@ -88,22 +88,26 @@ commands = {[testenv:build]commands} basepython = python3.10 commands = {[testenv:build]commands} -[testenv:build-py310-optional] -basepython = python3.10 +[testenv:build-py311] +basepython = python3.11 +commands = {[testenv:build]commands} + +[testenv:build-py311-optional] +basepython = python3.11 deps = -rrequirements-dev.txt commands = {[testenv:build]commands} -[testenv:build-py310-upgraded] -basepython = python3.10 +[testenv:build-py311-upgraded] +basepython = python3.11 install_command = python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt commands = {[testenv:build]commands} -[testenv:build-py310-prerelease] -basepython = python3.10 +[testenv:build-py311-prerelease] +basepython = python3.11 install_command = python -m pip install -U --pre {opts} {packages} deps = @@ -135,11 +139,6 @@ deps = commands = python test_gallery.py -[testenv:gallery-py37] -basepython = python3.7 -deps = {[testenv:gallery]deps} -commands = {[testenv:gallery]commands} - [testenv:gallery-py38] basepython = python3.8 deps = {[testenv:gallery]deps} @@ -155,9 +154,14 @@ basepython = python3.10 deps = {[testenv:gallery]deps} commands = {[testenv:gallery]commands} -# Test with python 3.10; pinned dev, doc, and optional reqs; upgraded run reqs -[testenv:gallery-py310-upgraded] -basepython = python3.10 +[testenv:gallery-py311] +basepython = python3.11 +deps = {[testenv:gallery]deps} +commands = {[testenv:gallery]commands} + +# Test with python 3.11; pinned dev, doc, and optional reqs; upgraded run reqs +[testenv:gallery-py311-upgraded] +basepython = python3.11 install_command = python -m pip install -U {opts} {packages} deps = @@ -165,9 +169,9 @@ deps = -rrequirements-doc.txt commands = {[testenv:gallery]commands} -# Test with python 3.10; pinned dev, doc, and optional reqs; pre-release run reqs -[testenv:gallery-py310-prerelease] -basepython = python3.10 +# Test with python 3.11; pinned dev, doc, and optional reqs; pre-release run reqs +[testenv:gallery-py311-prerelease] +basepython = python3.11 install_command = python -m pip install -U --pre {opts} {packages} deps = From 3c2bb123f29f88283b102368f94356f3df06c56c Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sun, 21 May 2023 01:16:32 -0700 Subject: [PATCH 14/27] Fix broken nightly macos gallery min test (#93) * Fix broken nightly macos gallery min test * Update CHANGELOG.md --- .github/workflows/run_all_tests.yml | 2 +- CHANGELOG.md | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 0265360d..e58226f3 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -94,7 +94,7 @@ jobs: - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } - { name: windows-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: windows-gallery-python3.11-prerelease, test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: windows-latest } - - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-37-minimum , python-ver: "3.7" , os: macos-latest } + - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: macos-latest } - { name: macos-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: macos-latest } - { name: macos-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index 28c84972..acb3b0fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ ### API Changes * Removed unused ``filepath`` argument from ``ZarrIO.get_builder_exists_on_disk`` [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) +### Bug fixes +* Fixed error in nightly CI. @rly [#93](https://github.com/hdmf-dev/hdmf-zarr/pull/93) + ## 0.2.0 (January 6, 2023) ### Bugs From f8f4adad1dd92e3ce3d05156453db2c01e96a251 Mon Sep 17 00:00:00 2001 From: bendichter Date: Fri, 16 Jun 2023 20:17:54 -0400 Subject: [PATCH 15/27] add can_read classmethod to ZarrIO add test for new method --- src/hdmf_zarr/backend.py | 8 ++++++++ tests/unit/base_tests_zarrio.py | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 7c364864..fd3579a2 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -75,6 +75,14 @@ class ZarrIO(HDMFIO): + @classmethod + def can_read(cls, path): + try: + zarr.open(path, mode="r") + return True + except Exception: + return False + @docval({'name': 'path', 'type': (str, *SUPPORTED_ZARR_STORES), 'doc': 'the path to the Zarr file or a supported Zarr store'}, diff --git a/tests/unit/base_tests_zarrio.py b/tests/unit/base_tests_zarrio.py index 3e157413..9319928a 100644 --- a/tests/unit/base_tests_zarrio.py +++ b/tests/unit/base_tests_zarrio.py @@ -170,6 +170,9 @@ def createReferenceCompoundBuilder(self): 'ref_dataset': dataset_ref}) return builder + def test_cannot_read(self): + assert not ZarrIO.can_read("incorrect_path") + def read_test_dataset(self): reader = ZarrIO(self.store, manager=self.manager, mode='r') self.root = reader.read_builder() @@ -209,6 +212,7 @@ def test_write_int(self, test_data=None): writer = ZarrIO(self.store, manager=self.manager, mode='a') writer.write_builder(self.builder) writer.close() + assert ZarrIO.can_read(self.store) def test_write_compound(self, test_data=None): """ @@ -295,6 +299,7 @@ def test_write_reference_compound(self): def test_read_int(self): test_data = np.arange(100, 200, 10).reshape(5, 2) self.test_write_int(test_data=test_data) + dataset = self.read_test_dataset()['data'][:] self.assertTrue(np.all(test_data == dataset)) From d4c196488590bd1060979d7ba9eec1a12b217b53 Mon Sep 17 00:00:00 2001 From: bendichter Date: Fri, 16 Jun 2023 20:32:51 -0400 Subject: [PATCH 16/27] change to static method --- src/hdmf_zarr/backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index fd3579a2..153b703b 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -75,8 +75,8 @@ class ZarrIO(HDMFIO): - @classmethod - def can_read(cls, path): + @staticmethod + def can_read(path): try: zarr.open(path, mode="r") return True From fdf5475fae8129b6e688963f41947fb2366fd6dc Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 16 Jun 2023 20:33:21 -0400 Subject: [PATCH 17/27] Update tests/unit/base_tests_zarrio.py --- tests/unit/base_tests_zarrio.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/base_tests_zarrio.py b/tests/unit/base_tests_zarrio.py index 9319928a..6b1c7005 100644 --- a/tests/unit/base_tests_zarrio.py +++ b/tests/unit/base_tests_zarrio.py @@ -299,7 +299,6 @@ def test_write_reference_compound(self): def test_read_int(self): test_data = np.arange(100, 200, 10).reshape(5, 2) self.test_write_int(test_data=test_data) - dataset = self.read_test_dataset()['data'][:] self.assertTrue(np.all(test_data == dataset)) From 26cad6393170be8dcc6ef14a53dc91674c5a0912 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sun, 18 Jun 2023 00:48:20 -0700 Subject: [PATCH 18/27] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index acb3b0fb..595f43a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ ### Minor enhancements * Updated handling of references on read to simplify future integration of file-based Zarr stores (e.g., ZipStore or database stores) @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) +* Added can_read classmethod to ZarrIO. @bendichter [#97](https://github.com/hdmf-dev/hdmf-zarr/pull/97) ### Test suite enhancements * Modularized unit tests to simplify running tests for multiple Zarr storage backends From 6a3474eaeb083d9c580bfa75cdf3dd74a0ac9fdb Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sat, 1 Jul 2023 10:57:37 -0700 Subject: [PATCH 19/27] Add OtherIO.can_read method to tests (#102) --- CHANGELOG.md | 11 ++++++----- tests/unit/base_tests_zarrio.py | 4 ++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 595f43a5..055b5dcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,20 +4,21 @@ ### New Features * Added support, tests, and docs for using ``DirectoryStore``, ``TempStore``, and - ``NestedDirectoryStore`` Zarr storage backends with ``ZarrIO`` and ``NWBZarrIO`` + ``NestedDirectoryStore`` Zarr storage backends with ``ZarrIO`` and ``NWBZarrIO``. @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) ### Minor enhancements * Updated handling of references on read to simplify future integration of file-based Zarr - stores (e.g., ZipStore or database stores) @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) -* Added can_read classmethod to ZarrIO. @bendichter [#97](https://github.com/hdmf-dev/hdmf-zarr/pull/97) + stores (e.g., ZipStore or database stores). @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) +* Added ``can_read`` classmethod to ``ZarrIO``. @bendichter [#97](https://github.com/hdmf-dev/hdmf-zarr/pull/97) ### Test suite enhancements -* Modularized unit tests to simplify running tests for multiple Zarr storage backends +* Modularized unit tests to simplify running tests for multiple Zarr storage backends. @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) +* Updated tests to handle upcoming changes to ``HDMFIO``. @rly [#102](https://github.com/hdmf-dev/hdmf-zarr/pull/102) ### Docs -* Added developer documentation on how to integrate new storage backends with ZarrIO +* Added developer documentation on how to integrate new storage backends with ZarrIO. @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) ### API Changes diff --git a/tests/unit/base_tests_zarrio.py b/tests/unit/base_tests_zarrio.py index 6b1c7005..8c06e44d 100644 --- a/tests/unit/base_tests_zarrio.py +++ b/tests/unit/base_tests_zarrio.py @@ -1434,6 +1434,10 @@ def test_non_manager_container(self): class OtherIO(HDMFIO): + @staticmethod + def can_read(path): + pass + def read_builder(self): pass From 13b92e8effc22bf201c4147f6fc0054d321a351f Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 10 Jul 2023 02:07:32 -0700 Subject: [PATCH 20/27] Update base_tests_zarrio.py (#103) --- tests/unit/base_tests_zarrio.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/base_tests_zarrio.py b/tests/unit/base_tests_zarrio.py index 8c06e44d..199ce7aa 100644 --- a/tests/unit/base_tests_zarrio.py +++ b/tests/unit/base_tests_zarrio.py @@ -1470,6 +1470,10 @@ class OtherIO(HDMFIO): def __init__(self, manager): super().__init__(manager=manager) + @staticmethod + def can_read(path): + pass + def read_builder(self): pass From 30219bad7035beeaf751ee24f9a1a869a831692c Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 12 Jul 2023 09:11:47 -0700 Subject: [PATCH 21/27] Fix testing of min and optional requirements (#99) * Fix testing of min and optional requirements * Update CHANGELOG.md * Update tox.ini * Ignore pkg_resources deprecation warning in test_gallery (#100) * Update test_gallery.py * Update requirements-min.txt * Update setup.py * Update requirements.txt * Use hdmf 3.5.4 only * Update tox.ini * Discard changes to .github/workflows/run_all_tests.yml --------- Co-authored-by: Matthew Avaylon --- CHANGELOG.md | 6 +++++- requirements-min.txt | 4 ++-- requirements.txt | 4 ++-- setup.py | 4 ++-- test_gallery.py | 16 ++++++++++++++++ tox.ini | 16 ++++++++++++---- 6 files changed, 39 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 055b5dcd..7554d4be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,11 @@ ### Test suite enhancements * Modularized unit tests to simplify running tests for multiple Zarr storage backends. @oruebel [#62](https://github.com/hdmf-dev/hdmf-zarr/pull/62) -* Updated tests to handle upcoming changes to ``HDMFIO``. @rly [#102](https://github.com/hdmf-dev/hdmf-zarr/pull/102) +* Fixed CI testing of minimum and optional installation requirement. @rly + [#99](https://github.com/hdmf-dev/hdmf-zarr/pull/99) +* Updated tests to handle upcoming changes to ``HDMFIO``. @rly + [#102](https://github.com/hdmf-dev/hdmf-zarr/pull/102) + ### Docs * Added developer documentation on how to integrate new storage backends with ZarrIO. @oruebel diff --git a/requirements-min.txt b/requirements-min.txt index 003d102b..f5de79ad 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,6 +1,6 @@ -hdmf==3.5.2 +hdmf==3.5.4 zarr==2.11.0 numcodecs==0.9.1 -pynwb==2.0.0 +pynwb==2.3.2 setuptools importlib_resources;python_version<'3.9' # Remove when python 3.9 becomes the new minimum diff --git a/requirements.txt b/requirements.txt index fe306dc3..6c5f1020 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # pinned dependencies to reproduce an entire development environment to use HDMF-ZARR -hdmf==3.5.2 +hdmf==3.5.4 zarr==2.11.0 -pynwb==2.3.0 +pynwb==2.3.2 numpy==1.23.5 numcodecs==0.11.0 diff --git a/setup.py b/setup.py index 1b6582d5..254bd0f5 100755 --- a/setup.py +++ b/setup.py @@ -17,11 +17,11 @@ reqs = [ - 'hdmf<=3.5.4, >=3.5.2', + 'hdmf==3.5.4', # temporary 'zarr>=2.11.0', 'numpy>=1.22, <1.24; python_version>"3.7"', 'numcodecs>=0.9.1', - 'pynwb>=2.0.0', + 'pynwb>=2.3.2', 'setuptools', ] diff --git a/test_gallery.py b/test_gallery.py index 731a9f0d..77613a89 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -23,6 +23,14 @@ def _import_from_file(script): spec.loader.exec_module(module) +_pkg_resources_warning_re = ( + "pkg_resources is deprecated as an API" +) + +_pkg_resources_declare_warning_re = ( + r"Deprecated call to `pkg_resources\.declare_namespace.*" +) + _numpy_warning_re = ( "numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192" ) @@ -111,6 +119,14 @@ def run_gallery_tests(): # against a different version of numpy than the one installed "ignore", message=_numpy_warning_re, category=RuntimeWarning ) + warnings.filterwarnings( + # this warning is triggered when downstream code such as pynwb uses pkg_resources>=5.13 + "ignore", message=_pkg_resources_warning_re, category=DeprecationWarning + ) + warnings.filterwarnings( + # this warning is triggered when downstream code such as pynwb uses pkg_resources>=5.13 + "ignore", message=_pkg_resources_declare_warning_re, category=DeprecationWarning + ) _import_from_file(script_abs) except Exception: print(traceback.format_exc()) diff --git a/tox.ini b/tox.ini index 69cf27bf..b79471ed 100644 --- a/tox.ini +++ b/tox.ini @@ -14,7 +14,7 @@ setenv = PYTHONDONTWRITEBYTECODE = 1 VIRTUALENV_python -m pip = 22.3.1 install_command = - python -m pip install -U {opts} {packages} + python -m pip install {opts} {packages} deps = -rrequirements-dev.txt @@ -37,7 +37,8 @@ basepython = python3.11 install_command = python -m pip install {opts} {packages} deps = - -rrequirements-dev.txt + {[testenv]deps} + # -rrequirements-opt.txt commands = {[testenv]commands} # Test with python 3.11; pinned dev and optional reqs; upgraded run reqs @@ -47,6 +48,7 @@ install_command = python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt + # -rrequirements-opt.txt commands = {[testenv]commands} # Test with python 3.11; pinned dev and optional reqs; upgraded, pre-release run reqs @@ -56,6 +58,7 @@ install_command = python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt + # -rrequirements-opt.txt commands = {[testenv]commands} # Test with python 3.7; pinned dev reqs; minimum run reqs @@ -95,7 +98,8 @@ commands = {[testenv:build]commands} [testenv:build-py311-optional] basepython = python3.11 deps = - -rrequirements-dev.txt + {[testenv]deps} + # -rrequirements-opt.txt commands = {[testenv:build]commands} [testenv:build-py311-upgraded] @@ -104,6 +108,7 @@ install_command = python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt + # -rrequirements-opt.txt commands = {[testenv:build]commands} [testenv:build-py311-prerelease] @@ -112,6 +117,7 @@ install_command = python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt + # -rrequirements-opt.txt commands = {[testenv:build]commands} [testenv:build-py37-minimum] @@ -129,7 +135,7 @@ commands = python -c "import hdmf_zarr" # Envs that will execute gallery tests [testenv:gallery] install_command = - python -m pip install -U {opts} {packages} + python -m pip install {opts} {packages} deps = -rrequirements-dev.txt @@ -167,6 +173,7 @@ install_command = deps = -rrequirements-dev.txt -rrequirements-doc.txt + # -rrequirements-opt.txt commands = {[testenv:gallery]commands} # Test with python 3.11; pinned dev, doc, and optional reqs; pre-release run reqs @@ -177,6 +184,7 @@ install_command = deps = -rrequirements-dev.txt -rrequirements-doc.txt + # -rrequirements-opt.txt commands = {[testenv:gallery]commands} # Test with python 3.7; pinned dev and doc reqs; minimum run reqs From 9ab601f12adc61efc8b254e5a29f8696210e542f Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Sun, 23 Jul 2023 09:12:25 -0700 Subject: [PATCH 22/27] Update CHANGELOG.md (#108) --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7554d4be..d0e627a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF-ZARR Changelog -## 0.3.0 (Upcoming) +## 0.3.0 (July 21, 2023) ### New Features * Added support, tests, and docs for using ``DirectoryStore``, ``TempStore``, and From d47fc8249e76a371fff990b07218a088e59ea590 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Sun, 23 Jul 2023 11:08:28 -0700 Subject: [PATCH 23/27] Update deploy_release.yml (#109) * Update deploy_release.yml * Update CHANGELOG.md --- .github/workflows/deploy_release.yml | 4 ++-- CHANGELOG.md | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 23337005..7a3e7399 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -28,11 +28,11 @@ jobs: - name: Run tox tests run: | - tox -e py310-upgraded + tox -e py311-upgraded - name: Build wheel and source distribution run: | - tox -e build-py310-upgraded + tox -e build-py311-upgraded ls -1 dist - name: Test installation from a wheel diff --git a/CHANGELOG.md b/CHANGELOG.md index d0e627a6..77f9d529 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # HDMF-ZARR Changelog +## 0.3.1 (Upcoming) + +### Bug fixes +* Fixed error in deploy workflow. @mavaylon1 [#109](https://github.com/hdmf-dev/hdmf-zarr/pull/109) + + ## 0.3.0 (July 21, 2023) ### New Features From 086c9bc0078b6defde7987672cddf91f4b4f1c95 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 18 Aug 2023 20:47:32 -0700 Subject: [PATCH 24/27] Update release.md (#107) --- .github/PULL_REQUEST_TEMPLATE/release.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 13893a09..a26798e0 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -1,4 +1,4 @@ -Prepare for release of HDMF [version] +Prepare for release of HDMF-Zarr [version] ### Before merging: - [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, @@ -21,4 +21,4 @@ Prepare for release of HDMF [version] [GitHub releases page](https://github.com/hdmf-dev/hdmf-zarr/releases) with the changelog 3. Check that the readthedocs "latest" and "stable" builds run and succeed 4. Update [conda-forge/hdmf_zarr-feedstock](https://github.com/conda-forge/hdmf_zarr-feedstock) with the latest version number - and SHA256 retrieved from PyPI > HDMF > Download Files > View hashes for the `.tar.gz` file. Re-render as needed + and SHA256 retrieved from PyPI > HDMF-Zarr > Download Files > View hashes for the `.tar.gz` file. Re-render as needed From 6c13e14927eea985d53174d8580224c97d65707a Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Tue, 22 Aug 2023 10:50:17 -0700 Subject: [PATCH 25/27] numpy degrade for python 3.7 (#115) * numpy degrade for python 3.7 * Update setup.py * Update setup.py * Update requirements.txt * Update requirements.txt * Update CHANGELOG.md * Update CHANGELOG.md * Update requirements.txt * Update setup.py * Update requirements.txt * Update setup.py * Update setup.py --- CHANGELOG.md | 1 + requirements.txt | 6 ++++-- setup.py | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77f9d529..087b91bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Bug fixes * Fixed error in deploy workflow. @mavaylon1 [#109](https://github.com/hdmf-dev/hdmf-zarr/pull/109) +* Fixed build error for ReadtheDocs by degrading numpy for python 3.7 support. @mavaylon1 [#115](https://github.com/hdmf-dev/hdmf-zarr/pull/115) ## 0.3.0 (July 21, 2023) diff --git a/requirements.txt b/requirements.txt index 6c5f1020..20a92d6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,7 @@ hdmf==3.5.4 zarr==2.11.0 pynwb==2.3.2 -numpy==1.23.5 -numcodecs==0.11.0 +numpy==1.21; python_version < "3.8" +numpy==1.23; python_version >= "3.8" +numcodecs==0.10.2; python_version < "3.8" +numcodecs==0.11.0; python_version >= "3.8" diff --git a/setup.py b/setup.py index 254bd0f5..5b155ecb 100755 --- a/setup.py +++ b/setup.py @@ -19,8 +19,11 @@ reqs = [ 'hdmf==3.5.4', # temporary 'zarr>=2.11.0', - 'numpy>=1.22, <1.24; python_version>"3.7"', + 'numpy<1.22; python_version < "3.8"', + 'numpy>=1.22; python_version >= "3.8"', 'numcodecs>=0.9.1', + 'numcodecs==0.10.2; python_version < "3.8"', + 'numcodecs==0.11.0; python_version >= "3.8"', 'pynwb>=2.3.2', 'setuptools', ] From c26248194ca73edc8caf6082b097ddba70c8cdf4 Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Fri, 29 Sep 2023 16:21:13 -0700 Subject: [PATCH 26/27] Rework ZarrIO backend/Remove python3.7/Update HDMF and PyNWB min/Update workflows (#120) * Both table and dataset of references work * checkpoint * all tests pass * tests pass * ignore warning * clean up * versions * 3.8 min * 3.8 min * remove 3.7 * remove 3.7 * remove 3.7 * remove 3.7 * remove * chckpoint * Update .readthedocs.yaml * Update .readthedocs.yaml * doc build test * sanity check * sanity check doc build * Update .readthedocs.yaml * doc build * build doc * clean up * docs * Update src/hdmf_zarr/backend.py Co-authored-by: Oliver Ruebel * Update src/hdmf_zarr/backend.py Co-authored-by: Oliver Ruebel * Update src/hdmf_zarr/backend.py Co-authored-by: Oliver Ruebel * Update src/hdmf_zarr/backend.py Co-authored-by: Oliver Ruebel * Update src/hdmf_zarr/zarr_utils.py Co-authored-by: Oliver Ruebel * class docstrings * flake8/slight change to TableRef * first round clean up for review * Update CHANGELOG.md * clean up * Update requirements.txt * Update setup.py * clean up spacing * source * default * Update src/hdmf_zarr/backend.py Co-authored-by: Oliver Ruebel * Update CHANGELOG.md Co-authored-by: Oliver Ruebel * export_source * Update src/hdmf_zarr/backend.py Co-authored-by: Oliver Ruebel * clean up/adjust resolve_ref * Update storage.rst * note * Update run_all_tests.yml Co-authored-by: Ryan Ly * Update backend.py Co-authored-by: Ryan Ly * test format * Update CHANGELOG.md Co-authored-by: Oliver Ruebel * Update src/hdmf_zarr/backend.py Co-authored-by: Oliver Ruebel * feedback fix * tests * Update tests/unit/utils.py Co-authored-by: Oliver Ruebel * flake --------- Co-authored-by: Oliver Ruebel Co-authored-by: Ryan Ly --- .github/ISSUE_TEMPLATE/bug_report.yml | 1 - .github/workflows/run_all_tests.yml | 21 +- .github/workflows/run_tests.yml | 12 +- .readthedocs.yaml | 6 + CHANGELOG.md | 9 +- docs/gallery/plot_convert_nwb_hdf5.py | 22 +- docs/source/storage.rst | 10 +- pyproject.toml | 2 +- requirements-min.txt | 4 +- requirements.txt | 10 +- setup.py | 13 +- src/hdmf_zarr/backend.py | 223 ++++++++++---------- src/hdmf_zarr/zarr_utils.py | 288 ++++++++++++++++++++++++++ tests/unit/base_tests_zarrio.py | 2 +- tests/unit/test_io_convert.py | 250 +++++++++++++++++++++- tests/unit/utils.py | 8 +- tox.ini | 22 +- 17 files changed, 719 insertions(+), 184 deletions(-) create mode 100644 src/hdmf_zarr/zarr_utils.py diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index a1db846c..ca2a47cb 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -62,7 +62,6 @@ body: attributes: label: Python Version options: - - "3.7" - "3.8" - "3.9" - "3.10" diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index e58226f3..5f5810cd 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -22,24 +22,21 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } + - { name: linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } - { name: linux-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: ubuntu-latest } - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } - { name: linux-python3.11-prerelease , test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: windows-latest } + - { name: windows-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: windows-latest } - { name: windows-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: windows-latest } - { name: windows-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: windows-latest } - { name: windows-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: windows-latest } - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: windows-python3.11-prerelease, test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: windows-latest } - - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: macos-latest } + - { name: macos-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: macos-latest } - { name: macos-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: macos-latest } - { name: macos-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: macos-latest } - { name: macos-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: macos-latest } @@ -88,13 +85,13 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } + - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } - { name: linux-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } - - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } + - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: windows-latest } - { name: windows-gallery-python3.11-prerelease, test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: windows-latest } - - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: macos-latest } + - { name: macos-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: macos-latest } - { name: macos-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: macos-latest } - { name: macos-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: @@ -132,8 +129,7 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: conda-linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } + - { name: conda-linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: conda-linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: conda-linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - { name: conda-linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } @@ -162,8 +158,7 @@ jobs: run: | conda config --set always_yes yes --set changeps1 no conda info - # the conda dependency resolution for tox under python 3.7 can install the wrong importlib_metadata - conda install -c conda-forge tox "importlib_metadata>4" + conda install -c conda-forge tox - name: Conda reporting run: | diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 857b4159..c30d8a3f 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -19,13 +19,13 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } + - { name: linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest , upload-wheels: true } - - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } + - { name: windows-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } - - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } + - { name: macos-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: macos-latest } - { name: macos-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs @@ -76,9 +76,9 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } + - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: ubuntu-latest } - - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } + - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-gallery-python3.11-upgraded, test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: windows-latest } steps: - name: Cancel non-latest runs @@ -114,7 +114,7 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } + - { name: conda-linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: conda-linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 182f9e65..cabf84ab 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -5,6 +5,11 @@ # Required version: 2 +build: + os: ubuntu-20.04 + tools: + python: '3.9' + # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/source/conf.py @@ -21,6 +26,7 @@ python: install: - requirements: requirements-doc.txt - requirements: requirements.txt + - path: . # path to the package relative to the root # Optionally include all submodules submodules: diff --git a/CHANGELOG.md b/CHANGELOG.md index 087b91bc..3ab43847 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ # HDMF-ZARR Changelog -## 0.3.1 (Upcoming) +## 0.4.0 (Upcoming) + +### Enhancements +* Enhanced ZarrIO to resolve object references lazily on read similar to HDMF's `HDF5IO` backend @mavaylon1 [#120](https://github.com/hdmf-dev/hdmf-zarr/pull/120) + +### Dependencies +* Updated HDMF and PyNWB version to the most recent release @mavaylon1 [#120](https://github.com/hdmf-dev/hdmf-zarr/pull/120) +* Updated minimum Python version from 3.7 to 3.8 @mavaylon1 [#120](https://github.com/hdmf-dev/hdmf-zarr/pull/120) ### Bug fixes * Fixed error in deploy workflow. @mavaylon1 [#109](https://github.com/hdmf-dev/hdmf-zarr/pull/109) diff --git a/docs/gallery/plot_convert_nwb_hdf5.py b/docs/gallery/plot_convert_nwb_hdf5.py index c26006f1..6565afe2 100644 --- a/docs/gallery/plot_convert_nwb_hdf5.py +++ b/docs/gallery/plot_convert_nwb_hdf5.py @@ -9,7 +9,6 @@ :py:class:`~ hdmf.backends.hdf5.h5tools.HDF5IO` HDF5 backend from HDMF for storage. """ - ############################################################################### # Setup # ----- @@ -31,11 +30,14 @@ # asset.download(filename) # # We here use a local copy of a small file from this DANDIset as an example: -# + # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_convert_nwb.png' import os import shutil +from pynwb import NWBHDF5IO +from hdmf_zarr.nwb import NWBZarrIO +from contextlib import suppress # Input file to convert basedir = "resources" @@ -62,9 +64,6 @@ # As this is an NWB file, we here use the :py:class:`pynwb.NWBHDF5IO` backend for reading the file from # from HDF5 and use the :py:class:`~hdmf_zarr.nwb.NWBZarrIO` backend to export the file to Zarr. -from pynwb import NWBHDF5IO -from hdmf_zarr.nwb import NWBZarrIO - with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io: # Create HDF5 IO object for read with NWBZarrIO(zarr_filename, mode='w') as export_io: # Create Zarr IO object for write export_io.export(src_io=read_io, write_args=dict(link_data=False)) # Export from HDF5 to Zarr @@ -77,7 +76,6 @@ # # Read the Zarr file back in # -------------------------- -# zr = NWBZarrIO(zarr_filename, 'r') zf = zr.read() @@ -107,9 +105,10 @@ # # Using the same approach as above, we can now convert our Zarr file back to HDF5. -with NWBZarrIO(zarr_filename, mode='r') as read_io: # Create Zarr IO object for read - with NWBHDF5IO(hdf_filename, 'w') as export_io: # Create HDF5 IO object for write - export_io.export(src_io=read_io, write_args=dict(link_data=False)) # Export from Zarr to HDF5 +with suppress(Exception): # TODO: This is a temporary ignore on the convert_dtype exception. + with NWBZarrIO(zarr_filename, mode='r') as read_io: # Create Zarr IO object for read + with NWBHDF5IO(hdf_filename, 'w') as export_io: # Create HDF5 IO object for write + export_io.export(src_io=read_io, write_args=dict(link_data=False)) # Export from Zarr to HDF5 ############################################################################### # Read the new HDF5 file back @@ -118,5 +117,6 @@ # Now our file has been converted from HDF5 to Zarr and back again to HDF5. # Here we check that we can still read that file. -with NWBHDF5IO(hdf_filename, 'r') as hr: - hf = hr.read() +with suppress(Exception): # TODO: This is a temporary ignore on the convert_dtype exception. + with NWBHDF5IO(hdf_filename, 'r') as hr: + hf = hr.read() diff --git a/docs/source/storage.rst b/docs/source/storage.rst index b391dd12..1cb98576 100644 --- a/docs/source/storage.rst +++ b/docs/source/storage.rst @@ -296,7 +296,11 @@ store the definition of the ``region`` that is being referenced, e.g., a slice o 4) :py:meth:`~hdmf_zarr.backend.ZarrIO.__read_dataset` to support reading region references, which may also require updates to :py:meth:`~hdmf_zarr.backend.ZarrIO.__parse_ref` and :py:meth:`~hdmf_zarr.backend.ZarrIO.__resolve_ref`, and - 5) and possibly other parts of :py:class:`~hdmf_zarr.backend.ZarrIO` + 5) and possibly other parts of :py:class:`~hdmf_zarr.backend.ZarrIO`. + 6) The py:class:`~hdmf_zarr.zarr_utils.ContainerZarrRegionDataset` and + py:class:`~hdmf_zarr.zarr_utils.ContainerZarrRegionDataset` classes will also need to be finalized + to support region references. + .. _sec-zarr-storage-dtypes: @@ -379,7 +383,3 @@ data type. The specification of the namespace is stored in ``/specifications///``. Here ```` refers to the main name of the source-file without file extension (e.g., the core namespace defines ``nwb.ephys.yaml`` as source which would be stored in ``/specifications/core/2.0.1/nwb.ecephys``). - - - - diff --git a/pyproject.toml b/pyproject.toml index b2d9e782..ad47b0e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.black] line-length = 120 -target-version = ['py37'] +target-version = ['py38'] include = '\.pyi?$' extend-exclude = ''' /( diff --git a/requirements-min.txt b/requirements-min.txt index f5de79ad..4695e8f3 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,6 +1,6 @@ -hdmf==3.5.4 +hdmf==3.9.0 zarr==2.11.0 numcodecs==0.9.1 -pynwb==2.3.2 +pynwb==2.5.0 setuptools importlib_resources;python_version<'3.9' # Remove when python 3.9 becomes the new minimum diff --git a/requirements.txt b/requirements.txt index 20a92d6d..b6eb9731 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,6 @@ # pinned dependencies to reproduce an entire development environment to use HDMF-ZARR -hdmf==3.5.4 +hdmf==3.9.0 zarr==2.11.0 -pynwb==2.3.2 -numpy==1.21; python_version < "3.8" -numpy==1.23; python_version >= "3.8" -numcodecs==0.10.2; python_version < "3.8" -numcodecs==0.11.0; python_version >= "3.8" +pynwb==2.5.0 +numpy==1.24 +numcodecs==0.11.0 diff --git a/setup.py b/setup.py index 5b155ecb..3556fcf9 100755 --- a/setup.py +++ b/setup.py @@ -17,14 +17,12 @@ reqs = [ - 'hdmf==3.5.4', # temporary + 'hdmf>=3.9.0', 'zarr>=2.11.0', - 'numpy<1.22; python_version < "3.8"', - 'numpy>=1.22; python_version >= "3.8"', + 'numpy>=1.24', 'numcodecs>=0.9.1', - 'numcodecs==0.10.2; python_version < "3.8"', - 'numcodecs==0.11.0; python_version >= "3.8"', - 'pynwb>=2.3.2', + 'numcodecs==0.11.0', + 'pynwb>=2.5.0', 'setuptools', ] @@ -45,10 +43,9 @@ 'packages': pkgs, 'package_dir': {'': 'src'}, 'package_data': {}, - 'python_requires': '>=3.7', + 'python_requires': '>=3.8', 'classifiers': [ "Programming Language :: Python", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 153b703b..688d387d 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -1,8 +1,6 @@ """Module with the Zarr-based I/O-backend for HDMF""" # Python imports import os -import itertools -from copy import deepcopy import warnings import numpy as np import tempfile @@ -23,6 +21,7 @@ ZarrSpecWriter, ZarrSpecReader, ZarrIODataChunkIteratorQueue) +from .zarr_utils import BuilderZarrReferenceDataset, BuilderZarrTableDataset # HDMF imports from hdmf.backends.io import HDMFIO @@ -243,7 +242,7 @@ def export(self, **kwargs): raise UnsupportedOperation("Cannot export from non-Zarr backend %s to Zarr with write argument " "link_data=True." % src_io.__class__.__name__) - # write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder + write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder ckwargs = kwargs.copy() ckwargs['write_args'] = write_args super().export(**ckwargs) @@ -293,21 +292,29 @@ def get_builder_disk_path(self, **kwargs): {'name': 'exhaust_dci', 'type': bool, 'doc': 'exhaust DataChunkIterators one at a time. If False, add ' + 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end', - 'default': True}) + 'default': True}, + {'name': 'export_source', 'type': str, + 'doc': 'The source of the builders when exporting', 'default': None}) def write_builder(self, **kwargs): """Write a builder to disk""" - f_builder, link_data, exhaust_dci = getargs('builder', 'link_data', 'exhaust_dci', kwargs) + f_builder, link_data, exhaust_dci, export_source = getargs('builder', + 'link_data', + 'exhaust_dci', + 'export_source', + kwargs) for name, gbldr in f_builder.groups.items(): self.write_group(parent=self.__file, builder=gbldr, link_data=link_data, - exhaust_dci=exhaust_dci) + exhaust_dci=exhaust_dci, + export_source=export_source) for name, dbldr in f_builder.datasets.items(): self.write_dataset(parent=self.__file, builder=dbldr, link_data=link_data, - exhaust_dci=exhaust_dci) - self.write_attributes(self.__file, f_builder.attributes) + exhaust_dci=exhaust_dci, + export_source=export_source) + self.write_attributes(self.__file, f_builder.attributes) # the same as set_attributes in HDMF self.__dci_queue.exhaust_queue() # Write all DataChunkIterators that have been queued self._written_builders.set_written(f_builder) self.logger.debug("Done writing %s '%s' to path '%s'" % @@ -321,10 +328,17 @@ def write_builder(self, **kwargs): 'doc': 'exhaust DataChunkIterators one at a time. If False, add ' + 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end', 'default': True}, + {'name': 'export_source', 'type': str, + 'doc': 'The source of the builders when exporting', 'default': None}, returns='the Group that was created', rtype='Group') def write_group(self, **kwargs): """Write a GroupBuider to file""" - parent, builder, link_data, exhaust_dci = getargs('parent', 'builder', 'link_data', 'exhaust_dci', kwargs) + parent, builder, link_data, exhaust_dci, export_source = getargs('parent', + 'builder', + 'link_data', + 'exhaust_dci', + 'export_source', + kwargs) if self.get_written(builder): group = parent[builder.name] else: @@ -344,7 +358,8 @@ def write_group(self, **kwargs): self.write_dataset(parent=group, builder=sub_builder, link_data=link_data, - exhaust_dci=exhaust_dci) + exhaust_dci=exhaust_dci, + export_source=export_source) # write all links (haven implemented) links = builder.links @@ -360,12 +375,14 @@ def write_group(self, **kwargs): @docval({'name': 'obj', 'type': (Group, Array), 'doc': 'the Zarr object to add attributes to'}, {'name': 'attributes', 'type': dict, - 'doc': 'a dict containing the attributes on the Group or Dataset, indexed by attribute name'}) + 'doc': 'a dict containing the attributes on the Group or Dataset, indexed by attribute name'}, + {'name': 'export_source', 'type': str, + 'doc': 'The source of the builders when exporting', 'default': None}) def write_attributes(self, **kwargs): """ Set (i.e., write) the attributes on a given Zarr Group or Array """ - obj, attributes = getargs('obj', 'attributes', kwargs) + obj, attributes, export_source = getargs('obj', 'attributes', 'export_source', kwargs) for key, value in attributes.items(): # Case 1: list, set, tuple type attributes if isinstance(value, (set, list, tuple)) or (isinstance(value, np.ndarray) and np.ndim(value) != 0): @@ -391,15 +408,16 @@ def write_attributes(self, **kwargs): raise TypeError(str(e) + " type=" + str(type(value)) + " data=" + str(value)) from e # Case 2: References elif isinstance(value, (Container, Builder, ReferenceBuilder)): - if isinstance(value, RegionBuilder): - type_str = 'region' - refs = self.__get_ref(value.builder) - elif isinstance(value, (ReferenceBuilder, Container, Builder)): + # TODO: Region References are not yet supported + # if isinstance(value, RegionBuilder): + # type_str = 'region' + # refs = self.__get_ref(value.builder) + if isinstance(value, (ReferenceBuilder, Container, Builder)): type_str = 'object' if isinstance(value, Builder): - refs = self.__get_ref(value) + refs = self.__get_ref(value, export_source) else: - refs = self.__get_ref(value.builder) + refs = self.__get_ref(value.builder, export_source) tmp = {'zarr_dtype': type_str, 'value': refs} obj.attrs[key] = tmp # Case 3: Scalar attributes @@ -498,13 +516,13 @@ def __is_ref(self, dtype): else: return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE - def __resolve_ref(self, zarr_ref): + def resolve_ref(self, zarr_ref): """ Get the full path to the object linked to by the zarr reference The function only constructs the links to the targe object, but it does not check if the object exists - :param zarr_ref: Dict with `source` and `path` keys or a `ZarrRefernce` object + :param zarr_ref: Dict with `source` and `path` keys or a `ZarrReference` object :return: 1) name of the target object 2) the target zarr object within the target file """ @@ -535,7 +553,7 @@ def __resolve_ref(self, zarr_ref): # Return the create path return target_name, target_zarr_obj - def __get_ref(self, ref_object): + def __get_ref(self, ref_object, export_source=None): """ Create a ZarrReference object that points to the given container @@ -566,8 +584,16 @@ def __get_ref(self, ref_object): source = (builder.source if (builder.source is not None and os.path.isdir(builder.source)) else self.source) + # Make the source relative to the current file - source = os.path.relpath(os.path.abspath(source), start=self.abspath) + # TODO: This check assumes that all links are internal links on export. + # Need to deal with external links on export. + if export_source is not None: + # Make sure the source of the reference is now towards the new file + # and not the original source when exporting. + source = '.' + else: + source = os.path.relpath(os.path.abspath(source), start=self.abspath) # Return the ZarrReference object return ZarrReference(source, path) @@ -693,9 +719,16 @@ def __setup_chunked_dataset__(cls, parent, name, data, options=None): 'default': True}, {'name': 'force_data', 'type': None, 'doc': 'Used internally to force the data being used when we have to load the data', 'default': None}, + {'name': 'export_source', 'type': str, + 'doc': 'The source of the builders when exporting', 'default': None}, returns='the Zarr array that was created', rtype=Array) def write_dataset(self, **kwargs): # noqa: C901 - parent, builder, link_data, exhaust_dci = getargs('parent', 'builder', 'link_data', 'exhaust_dci', kwargs) + parent, builder, link_data, exhaust_dci, export_source = getargs('parent', + 'builder', + 'link_data', + 'exhaust_dci', + 'export_source', + kwargs) force_data = getargs('force_data', kwargs) if self.get_written(builder): return None @@ -729,7 +762,7 @@ def write_dataset(self, **kwargs): # noqa: C901 elif isinstance(data, HDMFDataset): # If we have a dataset of containers we need to make the references to the containers if len(data) > 0 and isinstance(data[0], Container): - ref_data = [self.__get_ref(data[i]) for i in range(len(data))] + ref_data = [self.__get_ref(data[i], export_source=export_source) for i in range(len(data))] shape = (len(data), ) type_str = 'object' dset = parent.require_dataset(name, @@ -752,7 +785,8 @@ def write_dataset(self, **kwargs): # noqa: C901 dset = self.write_dataset(parent=parent, builder=builder, link_data=link_data, - force_data=data[:]) + force_data=data[:], + export_source=export_source) self._written_builders.set_written(builder) # record that the builder has been written # Write a compound dataset elif isinstance(options['dtype'], list): @@ -761,7 +795,7 @@ def write_dataset(self, **kwargs): # noqa: C901 for i, dts in enumerate(options['dtype']): if self.__is_ref(dts['dtype']): refs.append(i) - ref_tmp = self.__get_ref(data[0][i]) + ref_tmp = self.__get_ref(data[0][i], export_source=export_source) if isinstance(ref_tmp, ZarrReference): dts_str = 'object' else: @@ -783,29 +817,31 @@ def write_dataset(self, **kwargs): # noqa: C901 for j, item in enumerate(data): new_item = list(item) for i in refs: - new_item[i] = self.__get_ref(item[i]) + new_item[i] = self.__get_ref(item[i], export_source=export_source) dset[j] = new_item else: # write a compound datatype dset = self.__list_fill__(parent, name, data, options) # Write a dataset of references elif self.__is_ref(options['dtype']): - if isinstance(data, RegionBuilder): - shape = (1,) - type_str = 'region' - refs = self.__get_ref(data.builder, data.region) - elif isinstance(data, ReferenceBuilder): + # TODO Region references are not yet support, but here how the code should look + # if isinstance(data, RegionBuilder): + # shape = (1,) + # type_str = 'region' + # refs = self.__get_ref(data.builder, data.region) + if isinstance(data, ReferenceBuilder): shape = (1,) type_str = 'object' - refs = self.__get_ref(data.builder) - elif options['dtype'] == 'region': - shape = (len(data), ) - type_str = 'region' - refs = [self.__get_ref(item.builder, item.region) for item in data] + refs = self.__get_ref(data.builder, export_source=export_source) + # TODO: Region References are not yet supported + # elif options['dtype'] == 'region': + # shape = (len(data), ) + # type_str = 'region' + # refs = [self.__get_ref(item.builder, item.region) for item in data] else: shape = (len(data), ) type_str = 'object' - refs = [self.__get_ref(item) for item in data] + refs = [self.__get_ref(item, export_source=export_source) for item in data] dset = parent.require_dataset(name, shape=shape, @@ -1037,6 +1073,34 @@ def __set_built(self, zarr_obj, builder): path = os.path.join(fpath, path) self.__built.setdefault(path, builder) + @docval({'name': 'zarr_obj', 'type': (Array, Group), + 'doc': 'the Zarr object to the corresponding Container/Data object for'}) + def get_container(self, **kwargs): + """ + Get the container for the corresponding Zarr Group or Dataset + + :raises ValueError: When no builder has been constructed yet for the given h5py object + """ + zarr_obj = getargs('zarr_obj', kwargs) + builder = self.get_builder(zarr_obj) + container = self.manager.construct(builder) + return container # TODO: This method should be moved to HDMFIO + + @docval({'name': 'zarr_obj', 'type': (Array, Group), + 'doc': 'the Zarr object to the corresponding Builder object for'}) + def get_builder(self, **kwargs): # TODO: move this to HDMFIO (define skeleton in there at least) + """ + Get the builder for the corresponding Group or Dataset + + :raises ValueError: When no builder has been constructed + """ + zarr_obj = kwargs['zarr_obj'] + builder = self.__get_built(zarr_obj) + if builder is None: + msg = '%s has not been built' % (zarr_obj.name) + raise ValueError(msg) + return builder + def __get_built(self, zarr_obj): """ Look up a builder for the given zarr object @@ -1092,7 +1156,7 @@ def __read_links(self, zarr_obj, parent): links = zarr_obj.attrs['zarr_link'] for link in links: link_name = link['name'] - target_name, target_zarr_obj = self.__resolve_ref(link) + target_name, target_zarr_obj = self.resolve_ref(link) # NOTE: __read_group and __read_dataset return the cached builders if the target has already been built if isinstance(target_zarr_obj, Group): builder = self.__read_group(target_zarr_obj, target_name) @@ -1132,91 +1196,42 @@ def __read_dataset(self, zarr_obj, name): if dtype == 'scalar': data = zarr_obj[0] - obj_refs = False - reg_refs = False - has_reference = False if isinstance(dtype, list): - # compound data type - obj_refs = list() - reg_refs = list() + # Check compound dataset where one of the subsets contains references + has_reference = False for i, dts in enumerate(dtype): - if dts['dtype'] == DatasetBuilder.OBJECT_REF_TYPE: - obj_refs.append(i) - has_reference = True - elif dts['dtype'] == DatasetBuilder.REGION_REF_TYPE: - reg_refs.append(i) + if dts['dtype'] in ['object', 'region']: # check items for object reference has_reference = True - + break + retrieved_dtypes = [dtype_dict['dtype'] for dtype_dict in dtype] + if has_reference: + # TODO: BuilderZarrTableDataset does not yet support region reference + data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes) elif self.__is_ref(dtype): - # reference array - has_reference = True - if dtype == DatasetBuilder.OBJECT_REF_TYPE: - obj_refs = True - elif dtype == DatasetBuilder.REGION_REF_TYPE: - reg_refs = True - - if has_reference: - try: - # TODO Should implement a lazy way to evaluate references for Zarr - data = deepcopy(data[:]) - self.__parse_ref(kwargs['maxshape'], obj_refs, reg_refs, data) - except ValueError as e: - raise ValueError(str(e) + " zarr-name=" + str(zarr_obj.name) + " name=" + str(name)) + # Array of references + if dtype == 'object': + data = BuilderZarrReferenceDataset(data, self) + # TODO: Resolution of Region reference not yet supported by BuilderZarrRegionDataset + # elif dtype == 'region': + # data = BuilderZarrRegionDataset(data, self) kwargs['data'] = data if name is None: name = str(os.path.basename(zarr_obj.name)) - ret = DatasetBuilder(name, **kwargs) + ret = DatasetBuilder(name, **kwargs) # create builder object for dataset ret.location = self.get_zarr_parent_path(zarr_obj) self._written_builders.set_written(ret) # record that the builder has been written self.__set_built(zarr_obj, ret) return ret - def __parse_ref(self, shape, obj_refs, reg_refs, data): - corr = [] - obj_pos = [] - reg_pos = [] - for s in shape: - corr.append(range(s)) - corr = tuple(corr) - for c in itertools.product(*corr): - if isinstance(obj_refs, list): - for i in obj_refs: - t = list(c) - t.append(i) - obj_pos.append(t) - elif obj_refs: - obj_pos.append(list(c)) - if isinstance(reg_refs, list): - for i in reg_refs: - t = list(c) - t.append(i) - reg_pos.append(t) - elif reg_refs: - reg_pos.append(list(c)) - - for p in obj_pos: - o = data - for i in p: - o = o[i] - target_name, target_zarr_obj = self.__resolve_ref(o) - o = data - for i in range(0, len(p)-1): - o = data[p[i]] - if isinstance(target_zarr_obj, zarr.hierarchy.Group): - o[p[-1]] = self.__read_group(target_zarr_obj, target_name) - else: - o[p[-1]] = self.__read_dataset(target_zarr_obj, target_name) - def __read_attrs(self, zarr_obj): ret = dict() for k in zarr_obj.attrs.keys(): if k not in self.__reserve_attribute: v = zarr_obj.attrs[k] if isinstance(v, dict) and 'zarr_dtype' in v: - # TODO Is this the correct way to resolve references? if v['zarr_dtype'] == 'object': - target_name, target_zarr_obj = self.__resolve_ref(v['value']) + target_name, target_zarr_obj = self.resolve_ref(v['value']) if isinstance(target_zarr_obj, zarr.hierarchy.Group): ret[k] = self.__read_group(target_zarr_obj, target_name) else: diff --git a/src/hdmf_zarr/zarr_utils.py b/src/hdmf_zarr/zarr_utils.py new file mode 100644 index 00000000..b9717c09 --- /dev/null +++ b/src/hdmf_zarr/zarr_utils.py @@ -0,0 +1,288 @@ +""" +Utilities for the Zarr I/O backend, +e.g., for wrapping Zarr arrays on read, wrapping arrays for configuring write, or +writing the spec among others +""" +from abc import ABCMeta, abstractmethod +from copy import copy +import numpy as np + +from zarr import Array as ZarrArray + +from hdmf.build import DatasetBuilder +from hdmf.array import Array +from hdmf.query import HDMFDataset, ReferenceResolver, ContainerResolver, BuilderResolver +from hdmf.utils import docval, popargs, get_docval + + +class ZarrDataset(HDMFDataset): + """ + Extension of HDMFDataset to add Zarr compatibility + """ + + @docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray, Array), 'doc': 'the Zarr file lazily evaluate'}, + {'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'}) + def __init__(self, **kwargs): + self.__io = popargs('io', kwargs) + super().__init__(**kwargs) + + @property + def io(self): + return self.__io + + @property + def shape(self): + return self.dataset.shape + + +class DatasetOfReferences(ZarrDataset, ReferenceResolver, metaclass=ABCMeta): + """ + An extension of the base ReferenceResolver class to add more abstract methods for + subclasses that will read Zarr references + """ + + @abstractmethod + def get_object(self, zarr_obj): + """ + A class that maps an Zarr object to a Builder or Container + """ + pass + + def invert(self): + """ + Return an object that defers reference resolution + but in the opposite direction. + """ + if not hasattr(self, '__inverted'): + cls = self.get_inverse_class() + docval = get_docval(cls.__init__) + kwargs = dict() + for arg in docval: + kwargs[arg['name']] = getattr(self, arg['name']) + self.__inverted = cls(**kwargs) + return self.__inverted + + def _get_ref(self, ref): + name, zarr_obj = self.io.resolve_ref(ref) # ref is a json dict containing the path to the object + return self.get_object(zarr_obj) + + def __iter__(self): + for ref in super().__iter__(): + yield self._get_ref(ref) + + def __next__(self): + return self._get_ref(super().__next__()) + + +class BuilderResolverMixin(BuilderResolver): # refactor to backend/utils.py + """ + A mixin for adding to Zarr reference-resolving types + the get_object method that returns Builders + """ + + def get_object(self, zarr_obj): + """ + A class that maps an Zarr object to a Builder + """ + return self.io.get_builder(zarr_obj) + + +class ContainerResolverMixin(ContainerResolver): # refactor to backend/utils.py + """ + A mixin for adding to Zarr reference-resolvinAbstractZarrReferenceDatasetg types + the get_object method that returns Containers + """ + + def get_object(self, zarr_obj): + """ + A class that maps an Zarr object to a Container + """ + return self.io.get_container(zarr_obj) + + +class AbstractZarrTableDataset(DatasetOfReferences): + """ + Extension of DatasetOfReferences to serve as the base class for resolving Zarr + references in compound datasets to either Builders and Containers. + """ + + @docval({'name': 'dataset', 'type': (np.ndarray, ZarrArray, Array), 'doc': 'the Zarr file lazily evaluate'}, + {'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'}, + {'name': 'types', 'type': (list, tuple), + 'doc': 'the list/tuple of reference types'}) + def __init__(self, **kwargs): + types = popargs('types', kwargs) + super().__init__(**kwargs) + self.__refgetters = dict() + for i, t in enumerate(types): + # if t is RegionReference: # TODO: Region References not yet supported + # self.__refgetters[i] = self.__get_regref + if t == DatasetBuilder.OBJECT_REF_TYPE: + self.__refgetters[i] = self._get_ref + elif t is str: + # we need this for when we read compound data types + # that have unicode sub-dtypes since Zarrpy does not + # store UTF-8 in compound dtypes + self.__refgetters[i] = self._get_utf + self.__types = types + tmp = list() + for i in range(len(self.dataset.dtype)): + sub = self.dataset.dtype[i] + if np.issubdtype(sub, np.dtype('O')): + tmp.append('object') + # TODO: Region References are not yet supported + if sub.metadata: + if 'vlen' in sub.metadata: + t = sub.metadata['vlen'] + if t is str: + tmp.append('utf') + elif t is bytes: + tmp.append('ascii') + else: + tmp.append(sub.type.__name__) + self.__dtype = tmp + + @property + def types(self): + return self.__types + + @property + def dtype(self): + return self.__dtype + + def __getitem__(self, arg): + rows = copy(super().__getitem__(arg)) + if np.issubdtype(type(arg), np.integer): + self.__swap_refs(rows) + else: + for row in rows: + self.__swap_refs(row) + return rows + + def __swap_refs(self, row): + for i in self.__refgetters: + getref = self.__refgetters[i] + row[i] = getref(row[i]) + + def _get_utf(self, string): + """ + Decode a dataset element to unicode + """ + return string.decode('utf-8') if isinstance(string, bytes) else string + + def __get_regref(self, ref): + obj = self._get_ref(ref) + return obj[ref] + + def resolve(self, manager): + return self[0:len(self)] + + def __iter__(self): + for i in range(len(self)): + yield self[i] + + +class AbstractZarrReferenceDataset(DatasetOfReferences): + """ + Extension of DatasetOfReferences to serve as the base class for resolving Zarr + references in datasets to either Builders and Containers. + """ + + def __getitem__(self, arg): + ref = super().__getitem__(arg) + if isinstance(ref, np.ndarray): + return [self._get_ref(x) for x in ref] + else: + return self._get_ref(ref) + + @property + def dtype(self): + return 'object' + + +class AbstractZarrRegionDataset(AbstractZarrReferenceDataset): + """ + Extension of DatasetOfReferences to serve as the base class for resolving Zarr + references in datasets to either Builders and Containers. + + Note: Region References are not yet supported. + """ + + def __getitem__(self, arg): + obj = super().__getitem__(arg) + ref = self.dataset[arg] + return obj[ref] + + @property + def dtype(self): + return 'region' + + +class ContainerZarrTableDataset(ContainerResolverMixin, AbstractZarrTableDataset): + """ + A reference-resolving dataset for resolving references inside tables + (i.e. compound dtypes) that returns resolved references as Containers + """ + + @classmethod + def get_inverse_class(cls): + return BuilderZarrTableDataset + + +class BuilderZarrTableDataset(BuilderResolverMixin, AbstractZarrTableDataset): + """ + A reference-resolving dataset for resolving references inside tables + (i.e. compound dtypes) that returns resolved references as Builders + """ + + @classmethod + def get_inverse_class(cls): + return ContainerZarrTableDataset + + +class ContainerZarrReferenceDataset(ContainerResolverMixin, AbstractZarrReferenceDataset): + """ + A reference-resolving dataset for resolving object references that returns + resolved references as Containers + """ + + @classmethod + def get_inverse_class(cls): + return BuilderZarrReferenceDataset + + +class BuilderZarrReferenceDataset(BuilderResolverMixin, AbstractZarrReferenceDataset): + """ + A reference-resolving dataset for resolving object references that returns + resolved references as Builders + """ + + @classmethod + def get_inverse_class(cls): + return ContainerZarrReferenceDataset + + +class ContainerZarrRegionDataset(ContainerResolverMixin, AbstractZarrRegionDataset): + """ + A reference-resolving dataset for resolving region references that returns + resolved references as Containers + + Note: Region References are not yet supported. + """ + + @classmethod + def get_inverse_class(cls): + return BuilderZarrRegionDataset + + +class BuilderZarrRegionDataset(BuilderResolverMixin, AbstractZarrRegionDataset): + """ + A reference-resolving dataset for resolving region references that returns + resolved references as Builders. + + Note: Region References are not yet supported. + """ + + @classmethod + def get_inverse_class(cls): + return ContainerZarrRegionDataset diff --git a/tests/unit/base_tests_zarrio.py b/tests/unit/base_tests_zarrio.py index 199ce7aa..b0853b06 100644 --- a/tests/unit/base_tests_zarrio.py +++ b/tests/unit/base_tests_zarrio.py @@ -228,7 +228,7 @@ def test_write_compound(self, test_data=None): {'name': 'name', 'dtype': str}] self.__dataset_builder = DatasetBuilder('my_data', data, dtype=data_type) self.createGroupBuilder() - writer = ZarrIO(self.store, manager=self.manager, mode='a') + writer = ZarrIO(self.store, manager=self.manager, mode='w') writer.write_builder(self.builder) writer.close() diff --git a/tests/unit/test_io_convert.py b/tests/unit/test_io_convert.py index 9f7f0439..fe75ab76 100644 --- a/tests/unit/test_io_convert.py +++ b/tests/unit/test_io_convert.py @@ -39,16 +39,19 @@ from hdmf_zarr.backend import (ZarrIO, ROOT_NAME) +from hdmf_zarr.zarr_utils import ContainerZarrReferenceDataset +from hdmf.backends.hdf5.h5_utils import ContainerH5ReferenceDataset from hdmf.backends.hdf5 import HDF5IO from hdmf.common import get_manager as get_hdmfcommon_manager from hdmf.testing import TestCase from hdmf.common import DynamicTable from hdmf.common import CSRMatrix -from tests.unit.utils import (Foo, - FooBucket, - FooFile, - get_foo_buildmanager) + + +from tests.unit.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, + Baz, BazData, BazBucket, get_baz_buildmanager, + BazCpdData, get_temp_filepath) from zarr.storage import (DirectoryStore, TempStore, @@ -104,8 +107,13 @@ class MixinTestCaseConvert(metaclass=ABCMeta): (Default=[None, ]) """ + REFERENCES = False + """ + Bool parameter passed to check for references. + """ + def get_manager(self): - raise NotImplementedError('Cannot run test unless get_manger is implemented') + raise NotImplementedError('Cannot run test unless get_manger is implemented') def setUp(self): self.__manager = self.get_manager() @@ -163,6 +171,20 @@ def test_export_roundtrip(self): container=container, write_path=write_path, export_path=export_path) + if self.REFERENCES: + if self.TARGET_FORMAT == "H5": + num_bazs = 10 + for i in range(num_bazs): + baz_name = 'baz%d' % i + self.assertIsInstance(exported_container.baz_data.data, ContainerH5ReferenceDataset) + self.assertIs(exported_container.baz_data.data[i], exported_container.bazs[baz_name]) + elif self.TARGET_FORMAT == "ZARR": + num_bazs = 10 + for i in range(num_bazs): + baz_name = 'baz%d' % i + self.assertIsInstance(exported_container.baz_data.data, ContainerZarrReferenceDataset) + self.assertIs(exported_container.baz_data.data[i], exported_container.bazs[baz_name]) + # assert that the roundtrip worked correctly message = "Using: write_path=%s, export_path=%s" % (str(write_path), str(export_path)) self.assertIsNotNone(str(container), message) # added as a test to make sure printing works @@ -178,7 +200,6 @@ def test_export_roundtrip(self): ignore_string_to_byte=self.IGNORE_STRING_TO_BYTE, message=message) self.close_files_and_ios() - # TODO: May need to add further asserts here ########################################################## @@ -196,6 +217,7 @@ class MixinTestHDF5ToZarr(): DirectoryStore('test_export_DirectoryStore.zarr'), TempStore(), NestedDirectoryStore('test_export_NestedDirectoryStore.zarr')] + TARGET_FORMAT = "ZARR" def get_manager(self): return get_hdmfcommon_manager() @@ -226,13 +248,14 @@ class MixinTestZarrToHDF5(): TempStore(), NestedDirectoryStore('test_export_NestedDirectoryStore.zarr')] EXPORT_PATHS = [None, ] + TARGET_FORMAT = "H5" def get_manager(self): return get_hdmfcommon_manager() def roundtripExportContainer(self, container, write_path, export_path): with ZarrIO(write_path, manager=self.get_manager(), mode='w') as write_io: - write_io.write(container, cache_spec=True) + write_io.write(container) with ZarrIO(write_path, manager=self.get_manager(), mode='r') as read_io: with HDF5IO(export_path, mode='w') as export_io: @@ -259,6 +282,7 @@ class MixinTestZarrToZarr(): DirectoryStore('test_export_DirectoryStore_Export.zarr'), TempStore(dir=os.path.dirname(__file__)), # set dir to avoid switching drives on Windows NestedDirectoryStore('test_export_NestedDirectoryStore_Export.zarr')] + TARGET_FORMAT = "ZARR" def get_manager(self): return get_hdmfcommon_manager() @@ -379,6 +403,34 @@ def setUpContainer(self): raise NotImplementedError("FOO_TYPE %i not implemented in test" % self.FOO_TYPE) +######################################## +# HDMF Baz test dataset of references +######################################## +class MixinTestBaz1(): + """ + Mixin class used in conjunction with MixinTestCaseConvert to test a dataset of references. + + Mixin class used in conjunction with MixinTestCaseConvert to create conversion tests that + test export of a dataset of references. This class only defines the setUpContainer + and get_manager functions. The roundtripExportContainer function required for + the test needs to be defined separately, e.g., MixinTestZarrToHDF5, MixinTestHDF5ToZarr, + or MixinTestZarrToZarr. + """ + def get_manager(self): + return get_baz_buildmanager() + + def setUpContainer(self): + num_bazs = 10 + # set up dataset of references + bazs = [] + for i in range(num_bazs): + bazs.append(Baz(name='baz%d' % i)) + baz_data = BazData(name='baz_data1', data=bazs) + + bucket = BazBucket(bazs=bazs, baz_data=baz_data) + return bucket + + ######################################## # Actual test cases for conversion ######################################## @@ -589,6 +641,190 @@ class TestHDF5toZarrFooCase2(MixinTestFoo, FOO_TYPE = MixinTestFoo.FOO_TYPES['link_data'] +######################################## +# Test cases for dataset of references +######################################## +class TestZarrToHDF5Baz1(MixinTestBaz1, + MixinTestZarrToHDF5, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of a BazBucket containing a dataset of references from Zarr to HDF5 + See MixinTestBaz1.setUpContainer for the container spec used. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True + REFERENCES = True + + +class TestHDF5toZarrBaz1(MixinTestBaz1, + MixinTestHDF5ToZarr, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of a BazBucket containing a dataset of references from HDF5 to Zarr + See MixinTestBaz1.setUpContainer for the container spec used. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True + REFERENCES = True + + +class TestZarrtoZarrBaz1(MixinTestBaz1, + MixinTestZarrToZarr, + MixinTestCaseConvert, + TestCase): + """ + Test the conversion of a BazBucket containing a dataset of references from Zarr to Zarr + See MixinTestBaz1.setUpContainer for the container spec used. + """ + IGNORE_NAME = True + IGNORE_HDMF_ATTRS = True + IGNORE_STRING_TO_BYTE = True + REFERENCES = True + + +################################################## +# Test cases for compound dataset of references +################################################## +class TestHDF5ToZarrCPD(TestCase): + """ + This class helps with making the test suit more readable, testing the roundtrip for compound + datasets that have references from HDF5 to Zarr. + """ + def test_export_cpd_dset_refs(self): + self.path = [get_temp_filepath() for i in range(2)] + + """Test that exporting a written container with a compound dataset with references works.""" + bazs = [] + baz_pairs = [] + num_bazs = 10 + for i in range(num_bazs): + b = Baz(name='baz%d' % i) + bazs.append(b) + baz_pairs.append((i, b)) + baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs) + bucket = BazBucket(name='root', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) + + with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with HDF5IO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket1 = read_io.read() + + # NOTE: reference IDs might be the same between two identical files + # adding a Baz with a smaller name should change the reference IDs on export + new_baz = Baz(name='baz000') + read_bucket1.add_baz(new_baz) + + with ZarrIO(self.path[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False)) + + with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket2 = read_io.read() + # remove and check the appended child, then compare the read container with the original + read_new_baz = read_bucket2.remove_baz(new_baz.name) + + self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True) + self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True) + for i in range(num_bazs): + baz_name = 'baz%d' % i + self.assertEqual(read_bucket2.baz_cpd_data.data[i][0], i) + self.assertIs(read_bucket2.baz_cpd_data.data[i][1], read_bucket2.bazs[baz_name]) + + +class TestZarrToHDF5CPD(TestCase): + """ + This class helps with making the test suit more readable, testing the roundtrip for compound + datasets that have references from Zarr to HDF5. + """ + def test_export_cpd_dset_refs(self): + self.path = [get_temp_filepath() for i in range(2)] + """Test that exporting a written container with a compound dataset with references works.""" + bazs = [] + baz_pairs = [] + num_bazs = 10 + for i in range(num_bazs): + b = Baz(name='baz%d' % i) + bazs.append(b) + baz_pairs.append((i, b)) + baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs) + bucket = BazBucket(name='root', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) + + with ZarrIO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + + with ZarrIO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket1 = read_io.read() + + # NOTE: reference IDs might be the same between two identical files + # adding a Baz with a smaller name should change the reference IDs on export + new_baz = Baz(name='baz000') + read_bucket1.add_baz(new_baz) + + with HDF5IO(self.path[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False)) + + with HDF5IO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket2 = read_io.read() + + # remove and check the appended child, then compare the read container with the original + read_new_baz = read_bucket2.remove_baz(new_baz.name) + self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True) + self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True) + for i in range(num_bazs): + baz_name = 'baz%d' % i + self.assertEqual(read_bucket2.baz_cpd_data.data[i][0], i) + self.assertIs(read_bucket2.baz_cpd_data.data[i][1], read_bucket2.bazs[baz_name]) + + +class TestZarrToZarrCPD(TestCase): + """ + This class helps with making the test suit more readable, testing the roundtrip for compound + datasets that have references from Zarr to Zarr. + """ + def test_export_cpd_dset_refs(self): + self.path = [get_temp_filepath() for i in range(2)] + + """Test that exporting a written container with a compound dataset with references works.""" + bazs = [] + baz_pairs = [] + num_bazs = 10 + for i in range(num_bazs): + b = Baz(name='baz%d' % i) + bazs.append(b) + baz_pairs.append((i, b)) + baz_cpd_data = BazCpdData(name='baz_cpd_data1', data=baz_pairs) + bucket = BazBucket(name='root', bazs=bazs.copy(), baz_cpd_data=baz_cpd_data) + + with ZarrIO(self.path[0], manager=get_baz_buildmanager(), mode='w') as write_io: + write_io.write(bucket) + with ZarrIO(self.path[0], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket1 = read_io.read() + read_bucket1.baz_cpd_data.data[0][0] + # NOTE: reference IDs might be the same between two identical files + # adding a Baz with a smaller name should change the reference IDs on export + new_baz = Baz(name='baz000') + read_bucket1.add_baz(new_baz) + + with ZarrIO(self.path[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_bucket1, write_args=dict(link_data=False)) + + with ZarrIO(self.path[1], manager=get_baz_buildmanager(), mode='r') as read_io: + read_bucket2 = read_io.read() + # remove and check the appended child, then compare the read container with the original + read_new_baz = read_bucket2.remove_baz(new_baz.name) + self.assertContainerEqual(new_baz, read_new_baz, ignore_hdmf_attrs=True) + + self.assertContainerEqual(bucket, read_bucket2, ignore_name=True, ignore_hdmf_attrs=True) + for i in range(num_bazs): + baz_name = 'baz%d' % i + self.assertEqual(read_bucket2.baz_cpd_data.data[i][0], i) + self.assertIs(read_bucket2.baz_cpd_data.data[i][1], read_bucket2.bazs[baz_name]) + + # TODO: Fails because we need to copy the data from the ExternalLink as it points to a non-Zarr source """ class TestFooExternalLinkHDF5ToZarr(MixinTestCaseConvert, TestCase): diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 64ccc4af..67f2e8e0 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -9,6 +9,7 @@ SpecNamespace, NamespaceBuilder) from hdmf.spec.spec import (ZERO_OR_MANY, ONE_OR_MANY, ZERO_OR_ONE) from hdmf.utils import (docval, getargs, get_docval) +from hdmf_zarr.backend import ROOT_NAME CORE_NAMESPACE = 'test_core' @@ -113,8 +114,6 @@ class FooFile(Container): and should be reset to 'root' when use is finished to avoid potential cross-talk between tests. """ - ROOT_NAME = 'root' # For HDF5 and Zarr this is the root. It should be set before use if different for the backend. - @docval({'name': 'buckets', 'type': list, 'doc': 'the FooBuckets in this file', 'default': list()}, {'name': 'foo_link', 'type': Foo, 'doc': 'an optional linked Foo', 'default': None}, {'name': 'foofile_data', 'type': 'array_data', 'doc': 'an optional dataset', 'default': None}, @@ -123,7 +122,7 @@ class FooFile(Container): def __init__(self, **kwargs): buckets, foo_link, foofile_data, foo_ref_attr = getargs('buckets', 'foo_link', 'foofile_data', 'foo_ref_attr', kwargs) - super().__init__(name=self.ROOT_NAME) # name is not used - FooFile should be the root container + super().__init__(name=ROOT_NAME) # name is not used - FooFile should be the root container self.__buckets = {b.name: b for b in buckets} # note: collections of groups are unordered in HDF5 for f in buckets: f.parent = self @@ -306,8 +305,7 @@ class BazCpdData(Data): class BazBucket(Container): - - @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'}, + @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket', 'default': ROOT_NAME}, {'name': 'bazs', 'type': list, 'doc': 'the Baz objects in this bucket'}, {'name': 'baz_data', 'type': BazData, 'doc': 'dataset of Baz references', 'default': None}, {'name': 'baz_cpd_data', 'type': BazCpdData, 'doc': 'dataset of Baz references', 'default': None}) diff --git a/tox.ini b/tox.ini index b79471ed..6934d6e4 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py37, py38, py39, py310, py311 +envlist = py38, py39, py310, py311 requires = pip >= 22.0 [testenv] @@ -61,9 +61,9 @@ deps = # -rrequirements-opt.txt commands = {[testenv]commands} -# Test with python 3.7; pinned dev reqs; minimum run reqs -[testenv:py37-minimum] -basepython = python3.7 +# Test with python 3.8; pinned dev reqs; minimum run reqs +[testenv:py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt @@ -75,10 +75,6 @@ commands = python -m pip install --upgrade build python -m build -[testenv:build-py37] -basepython = python3.7 -commands = {[testenv:build]commands} - [testenv:build-py38] basepython = python3.8 commands = {[testenv:build]commands} @@ -120,8 +116,8 @@ deps = # -rrequirements-opt.txt commands = {[testenv:build]commands} -[testenv:build-py37-minimum] -basepython = python3.7 +[testenv:build-py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt @@ -187,9 +183,9 @@ deps = # -rrequirements-opt.txt commands = {[testenv:gallery]commands} -# Test with python 3.7; pinned dev and doc reqs; minimum run reqs -[testenv:gallery-py37-minimum] -basepython = python3.7 +# Test with python 3.8; pinned dev and doc reqs; minimum run reqs +[testenv:gallery-py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt From 9f6c386c9c9fe3077bbb666ff5225f7ee476b36e Mon Sep 17 00:00:00 2001 From: Cody Baker <51133164+CodyCBakerPhD@users.noreply.github.com> Date: Sun, 1 Oct 2023 05:13:47 -0400 Subject: [PATCH 27/27] Parallel zarr with Queue instance attributes (#118) * Added on-node parallel write support for the ``ZarrIO``. * add optional requirements and some test integration for that * readthedocs optional requirements * update coverage CI with optional requirements * update external links CI with optional requirements * modular scope on parallel helpers Co-authored-by: Ryan Ly Co-authored-by: Oliver Ruebel --- .github/workflows/check_external_links.yml | 2 +- .github/workflows/run_coverage.yml | 2 +- .readthedocs.yaml | 1 + CHANGELOG.md | 3 + MANIFEST.in | 2 +- requirements-min.txt | 1 + requirements-opt.txt | 1 + requirements.txt | 3 +- setup.py | 2 + src/hdmf_zarr/backend.py | 220 +++++++++++----- src/hdmf_zarr/utils.py | 290 +++++++++++++++++++-- tests/unit/test_parallel_write.py | 267 +++++++++++++++++++ tox.ini | 17 +- 13 files changed, 713 insertions(+), 98 deletions(-) create mode 100644 requirements-opt.txt create mode 100644 tests/unit/test_parallel_write.py diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml index 1fbf0ee0..109446ad 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_external_links.yml @@ -27,7 +27,7 @@ jobs: - name: Install Sphinx dependencies and package run: | python -m pip install --upgrade pip - python -m pip install -r requirements-doc.txt -r requirements.txt + python -m pip install -r requirements-doc.txt -r requirements.txt -r requirements-opt.txt python -m pip install . - name: Check Sphinx external links run: sphinx-build -b linkcheck ./docs/source ./test_build diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index 142b0868..becadc4c 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -48,7 +48,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install -r requirements-dev.txt -r requirements.txt + python -m pip install -r requirements-dev.txt -r requirements.txt -r requirements-opt.txt - name: Install package run: | diff --git a/.readthedocs.yaml b/.readthedocs.yaml index cabf84ab..f57db3ed 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -26,6 +26,7 @@ python: install: - requirements: requirements-doc.txt - requirements: requirements.txt + - requirements: requirements-opt.txt - path: . # path to the package relative to the root # Optionally include all submodules diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ab43847..c9c6e89f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ * Fixed error in deploy workflow. @mavaylon1 [#109](https://github.com/hdmf-dev/hdmf-zarr/pull/109) * Fixed build error for ReadtheDocs by degrading numpy for python 3.7 support. @mavaylon1 [#115](https://github.com/hdmf-dev/hdmf-zarr/pull/115) +### New Features +* Added parallel write support for the ``ZarrIO``. @CodyCBakerPhD [#118](https://github.com/hdmf-dev/hdmf-zarr/pull/118) + ## 0.3.0 (July 21, 2023) diff --git a/MANIFEST.in b/MANIFEST.in index 783dea68..de5b2302 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ include LICENSE.txt versioneer.py src/hdmf_zarr/_version.py src/hdmf_zarr/_due.py -include requirements.txt requirements-dev.txt requirements-doc.txt +include requirements.txt requirements-dev.txt requirements-doc.txt requirements-opt.txt include test.py tox.ini graft tests diff --git a/requirements-min.txt b/requirements-min.txt index 4695e8f3..c452e4c5 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -4,3 +4,4 @@ numcodecs==0.9.1 pynwb==2.5.0 setuptools importlib_resources;python_version<'3.9' # Remove when python 3.9 becomes the new minimum +threadpoolctl==3.1.0 diff --git a/requirements-opt.txt b/requirements-opt.txt new file mode 100644 index 00000000..101e7d7b --- /dev/null +++ b/requirements-opt.txt @@ -0,0 +1 @@ +tqdm==4.65.0 diff --git a/requirements.txt b/requirements.txt index b6eb9731..edd4c45d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,6 @@ hdmf==3.9.0 zarr==2.11.0 pynwb==2.5.0 -numpy==1.24 +numpy==1.24.0 numcodecs==0.11.0 +threadpoolctl==3.2.0 diff --git a/setup.py b/setup.py index 3556fcf9..4ff0eb53 100755 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ 'numcodecs==0.11.0', 'pynwb>=2.5.0', 'setuptools', + 'threadpoolctl>=3.1.0', ] print(reqs) @@ -40,6 +41,7 @@ 'url': 'https://github.com/hdmf-dev/hdmf-zarr', 'license': "BSD", 'install_requires': reqs, + 'extras_require': {"tqdm": ["tqdm>=4.41.0"]}, 'packages': pkgs, 'package_dir': {'': 'src'}, 'package_data': {}, diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 688d387d..dd8b93ad 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -114,7 +114,7 @@ def __init__(self, **kwargs): self.__file = None self.__built = dict() self._written_builders = WriteStatusTracker() # track which builders were written (or read) by this IO object - self.__dci_queue = ZarrIODataChunkIteratorQueue() # a queue of DataChunkIterators that need to be exhausted + self.__dci_queue = None # Will be initialized on call to io.write # Codec class to be used. Alternates, e.g., =numcodecs.JSON self.__codec_cls = numcodecs.pickles.Pickle if object_codec_class is None else object_codec_class source_path = self.__path @@ -188,17 +188,54 @@ def load_namespaces(cls, namespace_catalog, path, namespaces=None): reader = ZarrSpecReader(ns_group) namespace_catalog.load_namespaces('namespace', reader=reader) - @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, - {'name': 'cache_spec', 'type': bool, 'doc': 'cache specification to file', 'default': True}, - {'name': 'link_data', 'type': bool, - 'doc': 'If not specified otherwise link (True) or copy (False) Datasets', 'default': True}, - {'name': 'exhaust_dci', 'type': bool, - 'doc': 'exhaust DataChunkIterators one at a time. If False, add ' + - 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end', - 'default': True},) + @docval( + {'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, + {'name': 'cache_spec', 'type': bool, 'doc': 'cache specification to file', 'default': True}, + {'name': 'link_data', 'type': bool, + 'doc': 'If not specified otherwise link (True) or copy (False) Datasets', 'default': True}, + {'name': 'exhaust_dci', 'type': bool, + 'doc': 'exhaust DataChunkIterators one at a time. If False, add ' + + 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end', + 'default': True}, + { + "name": "number_of_jobs", + "type": int, + "doc": ( + "Number of jobs to use in parallel during write " + "(only works with GenericDataChunkIterator-wrapped datasets)." + ), + "default": 1, + }, + { + "name": "max_threads_per_process", + "type": int, + "doc": ( + "Limits the number of threads used by each process. The default is None (no limits)." + ), + "default": None, + }, + { + "name": "multiprocessing_context", + "type": str, + "doc": ( + "Context for multiprocessing. It can be None (default), 'fork' or 'spawn'. " + "Note that 'fork' is only available on UNIX systems (not Windows)." + ), + "default": None, + }, + ) def write(self, **kwargs): - """Overwrite the write method to add support for caching the specification""" - cache_spec = popargs('cache_spec', kwargs) + """Overwrite the write method to add support for caching the specification and parallelization.""" + cache_spec, number_of_jobs, max_threads_per_process, multiprocessing_context = popargs( + "cache_spec", "number_of_jobs", "max_threads_per_process", "multiprocessing_context", kwargs + ) + + self.__dci_queue = ZarrIODataChunkIteratorQueue( + number_of_jobs=number_of_jobs, + max_threads_per_process=max_threads_per_process, + multiprocessing_context=multiprocessing_context, + ) + super(ZarrIO, self).write(**kwargs) if cache_spec: self.__cache_spec() @@ -225,8 +262,36 @@ def __cache_spec(self): writer = ZarrSpecWriter(ns_group) ns_builder.export('namespace', writer=writer) - @docval(*get_docval(HDMFIO.export), - {'name': 'cache_spec', 'type': bool, 'doc': 'whether to cache the specification to file', 'default': True}) + @docval( + *get_docval(HDMFIO.export), + {'name': 'cache_spec', 'type': bool, 'doc': 'whether to cache the specification to file', 'default': True}, + { + "name": "number_of_jobs", + "type": int, + "doc": ( + "Number of jobs to use in parallel during write " + "(only works with GenericDataChunkIterator-wrapped datasets)." + ), + "default": 1, + }, + { + "name": "max_threads_per_process", + "type": int, + "doc": ( + "Limits the number of threads used by each process. The default is None (no limits)." + ), + "default": None, + }, + { + "name": "multiprocessing_context", + "type": str, + "doc": ( + "Context for multiprocessing. It can be None (default), 'fork' or 'spawn'. " + "Note that 'fork' is only available on UNIX systems (not Windows)." + ), + "default": None, + }, + ) def export(self, **kwargs): """Export data read from a file from any backend to Zarr. See :py:meth:`hdmf.backends.io.HDMFIO.export` for more details. @@ -237,6 +302,15 @@ def export(self, **kwargs): src_io = getargs('src_io', kwargs) write_args, cache_spec = popargs('write_args', 'cache_spec', kwargs) + number_of_jobs, max_threads_per_process, multiprocessing_context = popargs( + "number_of_jobs", "max_threads_per_process", "multiprocessing_context", kwargs + ) + + self.__dci_queue = ZarrIODataChunkIteratorQueue( + number_of_jobs=number_of_jobs, + max_threads_per_process=max_threads_per_process, + multiprocessing_context=multiprocessing_context, + ) if not isinstance(src_io, ZarrIO) and write_args.get('link_data', True): raise UnsupportedOperation("Cannot export from non-Zarr backend %s to Zarr with write argument " @@ -286,36 +360,53 @@ def get_builder_disk_path(self, **kwargs): builder_path = os.path.join(basepath, self.__get_path(builder).lstrip("/")) return builder_path - @docval({'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder object representing the NWBFile'}, - {'name': 'link_data', 'type': bool, - 'doc': 'If not specified otherwise link (True) or copy (False) Zarr Datasets', 'default': True}, - {'name': 'exhaust_dci', 'type': bool, - 'doc': 'exhaust DataChunkIterators one at a time. If False, add ' + - 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end', - 'default': True}, - {'name': 'export_source', 'type': str, - 'doc': 'The source of the builders when exporting', 'default': None}) + @docval( + {'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder object representing the NWBFile'}, + { + 'name': 'link_data', + 'type': bool, + 'doc': 'If not specified otherwise link (True) or copy (False) Zarr Datasets', + 'default': True + }, + { + 'name': 'exhaust_dci', + 'type': bool, + 'doc': ( + 'Exhaust DataChunkIterators one at a time. If False, add ' + 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end' + ), + 'default': True, + }, + { + 'name': 'export_source', + 'type': str, + 'doc': 'The source of the builders when exporting', + 'default': None, + }, + ) def write_builder(self, **kwargs): - """Write a builder to disk""" - f_builder, link_data, exhaust_dci, export_source = getargs('builder', - 'link_data', - 'exhaust_dci', - 'export_source', - kwargs) + """Write a builder to disk.""" + f_builder, link_data, exhaust_dci, export_source = getargs( + 'builder', 'link_data', 'exhaust_dci', 'export_source', kwargs + ) for name, gbldr in f_builder.groups.items(): - self.write_group(parent=self.__file, - builder=gbldr, - link_data=link_data, - exhaust_dci=exhaust_dci, - export_source=export_source) + self.write_group( + parent=self.__file, + builder=gbldr, + link_data=link_data, + exhaust_dci=exhaust_dci, + export_source=export_source, + ) for name, dbldr in f_builder.datasets.items(): - self.write_dataset(parent=self.__file, - builder=dbldr, - link_data=link_data, - exhaust_dci=exhaust_dci, - export_source=export_source) + self.write_dataset( + parent=self.__file, + builder=dbldr, + link_data=link_data, + exhaust_dci=exhaust_dci, + export_source=export_source, + ) self.write_attributes(self.__file, f_builder.attributes) # the same as set_attributes in HDMF - self.__dci_queue.exhaust_queue() # Write all DataChunkIterators that have been queued + self.__dci_queue.exhaust_queue() # Write any remaining DataChunkIterators that have been queued self._written_builders.set_written(f_builder) self.logger.debug("Done writing %s '%s' to path '%s'" % (f_builder.__class__.__qualname__, f_builder.name, self.source)) @@ -333,12 +424,10 @@ def write_builder(self, **kwargs): returns='the Group that was created', rtype='Group') def write_group(self, **kwargs): """Write a GroupBuider to file""" - parent, builder, link_data, exhaust_dci, export_source = getargs('parent', - 'builder', - 'link_data', - 'exhaust_dci', - 'export_source', - kwargs) + parent, builder, link_data, exhaust_dci, export_source = getargs( + 'parent', 'builder', 'link_data', 'exhaust_dci', 'export_source', kwargs + ) + if self.get_written(builder): group = parent[builder.name] else: @@ -347,19 +436,23 @@ def write_group(self, **kwargs): subgroups = builder.groups if subgroups: for subgroup_name, sub_builder in subgroups.items(): - self.write_group(parent=group, - builder=sub_builder, - link_data=link_data, - exhaust_dci=exhaust_dci) + self.write_group( + parent=group, + builder=sub_builder, + link_data=link_data, + exhaust_dci=exhaust_dci, + ) datasets = builder.datasets if datasets: for dset_name, sub_builder in datasets.items(): - self.write_dataset(parent=group, - builder=sub_builder, - link_data=link_data, - exhaust_dci=exhaust_dci, - export_source=export_source) + self.write_dataset( + parent=group, + builder=sub_builder, + link_data=link_data, + exhaust_dci=exhaust_dci, + export_source=export_source, + ) # write all links (haven implemented) links = builder.links @@ -379,10 +472,9 @@ def write_group(self, **kwargs): {'name': 'export_source', 'type': str, 'doc': 'The source of the builders when exporting', 'default': None}) def write_attributes(self, **kwargs): - """ - Set (i.e., write) the attributes on a given Zarr Group or Array - """ + """Set (i.e., write) the attributes on a given Zarr Group or Array.""" obj, attributes, export_source = getargs('obj', 'attributes', 'export_source', kwargs) + for key, value in attributes.items(): # Case 1: list, set, tuple type attributes if isinstance(value, (set, list, tuple)) or (isinstance(value, np.ndarray) and np.ndim(value) != 0): @@ -723,13 +815,15 @@ def __setup_chunked_dataset__(cls, parent, name, data, options=None): 'doc': 'The source of the builders when exporting', 'default': None}, returns='the Zarr array that was created', rtype=Array) def write_dataset(self, **kwargs): # noqa: C901 - parent, builder, link_data, exhaust_dci, export_source = getargs('parent', - 'builder', - 'link_data', - 'exhaust_dci', - 'export_source', - kwargs) + parent, builder, link_data, exhaust_dci, export_source = getargs( + 'parent', 'builder', 'link_data', 'exhaust_dci', 'export_source', kwargs + ) + force_data = getargs('force_data', kwargs) + + if exhaust_dci and self.__dci_queue is None: + self.__dci_queue = ZarrIODataChunkIteratorQueue() + if self.get_written(builder): return None name = builder.name diff --git a/src/hdmf_zarr/utils.py b/src/hdmf_zarr/utils.py index 9c23aba5..c584451c 100644 --- a/src/hdmf_zarr/utils.py +++ b/src/hdmf_zarr/utils.py @@ -1,20 +1,36 @@ -"""Collection of utility I/O classes for the ZarrIO backend store""" -from zarr.hierarchy import Group -import zarr -import numcodecs -import numpy as np +"""Collection of utility I/O classes for the ZarrIO backend store.""" +import gc +import traceback +import multiprocessing +import math +import json +import logging from collections import deque from collections.abc import Iterable +from typing import Optional, Union, Literal, Tuple, Dict, Any +from concurrent.futures import ProcessPoolExecutor +from threadpoolctl import threadpool_limits +from warnings import warn -import json -import logging +import numcodecs +import zarr +import numpy as np +from zarr.hierarchy import Group + +from hdmf.data_utils import DataIO, GenericDataChunkIterator, DataChunkIterator, AbstractDataChunkIterator +from hdmf.query import HDMFDataset +from hdmf.utils import docval, getargs -from hdmf.data_utils import DataIO -from hdmf.utils import (docval, - getargs) +from hdmf.spec import SpecWriter, SpecReader -from hdmf.spec import (SpecWriter, - SpecReader) + +# Necessary definitions to avoid parallelization bugs, Inherited from SpikeInterface experience +# see +# https://stackoverflow.com/questions/10117073/how-to-use-initializer-to-set-up-my-multiprocess-pool +# the tricks is : theses 2 variables are global per worker +# so they are not share in the same process +global _worker_context +global _operation_to_run class ZarrIODataChunkIteratorQueue(deque): @@ -22,18 +38,37 @@ class ZarrIODataChunkIteratorQueue(deque): Helper class used by ZarrIO to manage the write for DataChunkIterators Each queue element must be a tupple of two elements: 1) the dataset to write to and 2) the AbstractDataChunkIterator with the data + :param number_of_jobs: The number of jobs used to write the datasets. The default is 1. + :type number_of_jobs: integer + :param max_threads_per_process: Limits the number of threads used by each process. The default is None (no limits). + :type max_threads_per_process: integer or None + :param multiprocessing_context: Context for multiprocessing. It can be None (default), "fork" or "spawn". + Note that "fork" is only available on UNIX systems (not Windows). + :type multiprocessing_context: string or None """ - def __init__(self): + def __init__( + self, + number_of_jobs: int = 1, + max_threads_per_process: Union[None, int] = None, + multiprocessing_context: Union[None, Literal["fork", "spawn"]] = None, + ): self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) + + self.number_of_jobs = number_of_jobs + self.max_threads_per_process = max_threads_per_process + self.multiprocessing_context = multiprocessing_context + super().__init__() @classmethod - def __write_chunk__(cls, dset, data): + def __write_chunk__(cls, dset: HDMFDataset, data: DataChunkIterator): """ Internal helper function used to read a chunk from the given DataChunkIterator and write it to the given Dataset - :param dset: The Dataset to write to - :param data: The DataChunkIterator to read from + :param dset: The Dataset to write to. + :type dset: HDMFDataset + :param data: The DataChunkIterator to read from. + :type data: DataChunkIterator :return: True of a chunk was written, False otherwise :rtype: bool """ @@ -63,19 +98,126 @@ def __write_chunk__(cls, dset, data): # Write the data dset[chunk_i.selection] = chunk_i.data # Chunk written and we need to continue + return True def exhaust_queue(self): """ - Read and write from any queued DataChunkIterators in a round-robin fashion + Read and write from any queued DataChunkIterators. + + Operates in a round-robin fashion for a single job. + Operates on a single dataset at a time with multiple jobs. """ - # Iterate through our queue and write data chunks in a round-robin fashion until all iterators are exhausted - self.logger.debug("Exhausting DataChunkIterator from queue (length %d)" % len(self)) + self.logger.debug(f"Exhausting DataChunkIterator from queue (length {len(self)})") + + if self.number_of_jobs > 1: + parallelizable_iterators = list() + buffer_map = list() + size_in_MB_per_iteration = list() + + display_progress = False + r_bar_in_MB = ( + "| {n_fmt}/{total_fmt} MB [Elapsed: {elapsed}, " + "Remaining: {remaining}, Rate:{rate_fmt}{postfix}]" + ) + bar_format = "{l_bar}{bar}" + f"{r_bar_in_MB}" + progress_bar_options = dict( + desc=f"Writing Zarr datasets with {self.number_of_jobs} jobs", + position=0, + bar_format=bar_format, + unit="MB", + ) + for (zarr_dataset, iterator) in iter(self): + # Parallel write only works well with GenericDataChunkIterators + # Due to perfect alignment between chunks and buffers + if not isinstance(iterator, GenericDataChunkIterator): + continue + + # Iterator must be pickleable as well, to be sent across jobs + is_iterator_pickleable, reason = self._is_pickleable(iterator=iterator) + if not is_iterator_pickleable: + self.logger.debug( + f"Dataset {zarr_dataset.path} was not pickleable during parallel write.\n\nReason: {reason}" + ) + continue + + # Add this entry to a running list to remove after initial pass (cannot mutate during iteration) + parallelizable_iterators.append((zarr_dataset, iterator)) + + # Disable progress at the iterator level and aggregate enable option + display_progress = display_progress or iterator.display_progress + iterator.display_progress = False + per_iterator_progress_options = { + key: value for key, value in iterator.progress_bar_options.items() + if key not in ["desc", "total", "file"] + } + progress_bar_options.update(**per_iterator_progress_options) + + iterator_itemsize = iterator.dtype.itemsize + for buffer_selection in iterator.buffer_selection_generator: + buffer_map_args = (zarr_dataset.store.path, zarr_dataset.path, iterator, buffer_selection) + buffer_map.append(buffer_map_args) + buffer_size_in_MB = math.prod( + [slice_.stop - slice_.start for slice_ in buffer_selection] + ) * iterator_itemsize / 1e6 + size_in_MB_per_iteration.append(buffer_size_in_MB) + progress_bar_options.update( + total=int(sum(size_in_MB_per_iteration)), # int() to round down to nearest integer for better display + ) + + if parallelizable_iterators: # Avoid spinning up ProcessPool if no candidates during this exhaustion + # Remove candidates for parallelization from the queue + for (zarr_dataset, iterator) in parallelizable_iterators: + self.remove((zarr_dataset, iterator)) + + operation_to_run = self._write_buffer_zarr + process_initialization = dict + initialization_arguments = () + with ProcessPoolExecutor( + max_workers=self.number_of_jobs, + initializer=self.initializer_wrapper, + mp_context=multiprocessing.get_context(method=self.multiprocessing_context), + initargs=( + operation_to_run, + process_initialization, + initialization_arguments, + self.max_threads_per_process + ), + ) as executor: + results = executor.map(self.function_wrapper, buffer_map) + + if display_progress: + try: # Import warnings are also issued at the level of the iterator instantiation + from tqdm import tqdm + + results = tqdm(iterable=results, **progress_bar_options) + + # exector map must be iterated to deploy commands over jobs + for size_in_MB, result in zip(size_in_MB_per_iteration, results): + results.update(n=int(size_in_MB)) # int() to round down for better display + except Exception as exception: # pragma: no cover + warn( + message=( + "Unable to setup progress bar due to" + f"\n{type(exception)}: {str(exception)}\n\n{traceback.format_exc()}" + ), + stacklevel=2, + ) + # exector map must be iterated to deploy commands over jobs + for result in results: + pass + else: + # exector map must be iterated to deploy commands over jobs + for result in results: + pass + + # Iterate through remaining queue and write DataChunks in a round-robin fashion until exhausted while len(self) > 0: - dset, data = self.popleft() - if self.__write_chunk__(dset, data): - self.append(dataset=dset, data=data) - self.logger.debug("Exhausted DataChunkIterator from queue (length %d)" % len(self)) + zarr_dataset, iterator = self.popleft() + if self.__write_chunk__(zarr_dataset, iterator): + self.append(dataset=zarr_dataset, data=iterator) + + self.logger.debug(f"Exhausted DataChunkIterator from queue (length {len(self)})") def append(self, dataset, data): """ @@ -87,6 +229,108 @@ def append(self, dataset, data): """ super().append((dataset, data)) + @staticmethod + def _is_pickleable(iterator: AbstractDataChunkIterator) -> Tuple[bool, Optional[str]]: + """ + Determine if the iterator can be pickled. + + Returns both the bool and the reason if False. + """ + try: + dictionary = iterator._to_dict() + iterator._from_dict(dictionary=dictionary) + + return True, None + except Exception as exception: + base_hdmf_not_implemented_messages = ( + "The `._to_dict()` method for pickling has not been defined for this DataChunkIterator!", + "The `._from_dict()` method for pickling has not been defined for this DataChunkIterator!", + ) + + if isinstance(exception, NotImplementedError) and str(exception) in base_hdmf_not_implemented_messages: + reason = "The pickling methods for the iterator have not been defined." + else: + reason = ( + f"The pickling methods for the iterator have been defined but throw the error:\n\n" + f"{type(exception)}: {str(exception)}\n\nwith traceback\n\n{traceback.format_exc()}," + ) + + return False, reason + + @staticmethod + def initializer_wrapper( + operation_to_run: callable, + process_initialization: callable, + initialization_arguments: Iterable, # TODO: eventually standardize with typing.Iterable[typing.Any] + max_threads_per_process: Optional[int] = None + ): # keyword arguments here are just for readability, ProcessPool only takes a tuple + """ + Needed as a part of a bug fix with cloud memory leaks discovered by SpikeInterface team. + + Recommended fix is to have global wrappers for the working initializer that limits the + threads used per process. + """ + global _worker_context + global _operation_to_run + + if max_threads_per_process is None: + _worker_context = process_initialization(*initialization_arguments) + else: + with threadpool_limits(limits=max_threads_per_process): + _worker_context = process_initialization(*initialization_arguments) + _worker_context["max_threads_per_process"] = max_threads_per_process + _operation_to_run = operation_to_run + + @staticmethod + def _write_buffer_zarr( + worker_context: Dict[str, Any], + zarr_store_path: str, + relative_dataset_path: str, + iterator: AbstractDataChunkIterator, + buffer_selection: Tuple[slice, ...], + ): + # TODO, figure out propagation of storage options + zarr_store = zarr.open(store=zarr_store_path, mode="r+") # storage_options=storage_options) + zarr_dataset = zarr_store[relative_dataset_path] + + data = iterator._get_data(selection=buffer_selection) + zarr_dataset[buffer_selection] = data + + # An issue detected in cloud usage by the SpikeInterface team + # Fix memory leak by forcing garbage collection + del data + gc.collect() + + @staticmethod + def function_wrapper(args: Tuple[str, str, AbstractDataChunkIterator, Tuple[slice, ...]]): + """ + Needed as a part of a bug fix with cloud memory leaks discovered by SpikeInterface team. + + Recommended fix is to have a global wrapper for the executor.map level. + """ + zarr_store_path, relative_dataset_path, iterator, buffer_selection = args + global _worker_context + global _operation_to_run + + max_threads_per_process = _worker_context["max_threads_per_process"] + if max_threads_per_process is None: + return _operation_to_run( + _worker_context, + zarr_store_path, + relative_dataset_path, + iterator, + buffer_selection + ) + else: + with threadpool_limits(limits=max_threads_per_process): + return _operation_to_run( + _worker_context, + zarr_store_path, + relative_dataset_path, + iterator, + buffer_selection, + ) + class ZarrSpecWriter(SpecWriter): """ diff --git a/tests/unit/test_parallel_write.py b/tests/unit/test_parallel_write.py new file mode 100644 index 00000000..61aae7ab --- /dev/null +++ b/tests/unit/test_parallel_write.py @@ -0,0 +1,267 @@ +"""Module for testing the parallel write feature for the ZarrIO.""" +import unittest +import platform +from typing import Tuple, Dict +from io import StringIO +from unittest.mock import patch + +import numpy as np +from numpy.testing import assert_array_equal +from hdmf_zarr import ZarrIO +from hdmf.common import DynamicTable, VectorData, get_manager +from hdmf.data_utils import GenericDataChunkIterator, DataChunkIterator + +try: + import tqdm # noqa: F401 + TQDM_INSTALLED = True +except ImportError: + TQDM_INSTALLED = False + + +class PickleableDataChunkIterator(GenericDataChunkIterator): + """Generic data chunk iterator used for specific testing purposes.""" + + def __init__(self, data, **base_kwargs): + self.data = data + + self._base_kwargs = base_kwargs + super().__init__(**base_kwargs) + + def _get_dtype(self) -> np.dtype: + return self.data.dtype + + def _get_maxshape(self) -> tuple: + return self.data.shape + + def _get_data(self, selection: Tuple[slice]) -> np.ndarray: + return self.data[selection] + + def __reduce__(self): + instance_constructor = self._from_dict + initialization_args = (self._to_dict(),) + return (instance_constructor, initialization_args) + + def _to_dict(self) -> Dict: + dictionary = dict() + # Note this is not a recommended way to pickle contents + # ~~ Used for testing purposes only ~~ + dictionary["data"] = self.data + dictionary["base_kwargs"] = self._base_kwargs + + return dictionary + + @staticmethod + def _from_dict(dictionary: dict) -> GenericDataChunkIterator: # TODO: need to investigate the need of base path + data = dictionary["data"] + + iterator = PickleableDataChunkIterator(data=data, **dictionary["base_kwargs"]) + return iterator + + +class NotPickleableDataChunkIterator(GenericDataChunkIterator): + """Generic data chunk iterator used for specific testing purposes.""" + + def __init__(self, data, **base_kwargs): + self.data = data + + self._base_kwargs = base_kwargs + super().__init__(**base_kwargs) + + def _get_dtype(self) -> np.dtype: + return self.data.dtype + + def _get_maxshape(self) -> tuple: + return self.data.shape + + def _get_data(self, selection: Tuple[slice]) -> np.ndarray: + return self.data[selection] + + +def test_parallel_write(tmpdir): + number_of_jobs = 2 + data = np.array([1., 2., 3.]) + column = VectorData(name="TestColumn", description="", data=PickleableDataChunkIterator(data=data)) + dynamic_table = DynamicTable(name="TestTable", description="", id=list(range(3)), columns=[column]) + + zarr_top_level_path = str(tmpdir / "test_parallel_write.zarr") + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + io.write(container=dynamic_table, number_of_jobs=number_of_jobs) + + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="r") as io: + dynamic_table_roundtrip = io.read() + data_roundtrip = dynamic_table_roundtrip["TestColumn"].data + assert_array_equal(data_roundtrip, data) + + +def test_mixed_iterator_types(tmpdir): + number_of_jobs = 2 + + generic_iterator_data = np.array([1., 2., 3.]) + generic_iterator_column = VectorData( + name="TestGenericIteratorColumn", + description="", + data=PickleableDataChunkIterator(data=generic_iterator_data) + ) + + classic_iterator_data = np.array([4., 5., 6.]) + classic_iterator_column = VectorData( + name="TestClassicIteratorColumn", + description="", + data=DataChunkIterator(data=classic_iterator_data) + ) + + unwrappped_data = np.array([7., 8., 9.]) + unwrapped_column = VectorData(name="TestUnwrappedColumn", description="", data=unwrappped_data) + dynamic_table = DynamicTable( + name="TestTable", + description="", + id=list(range(3)), + columns=[generic_iterator_column, classic_iterator_column, unwrapped_column], + ) + + zarr_top_level_path = str(tmpdir / "test_mixed_iterator_types.zarr") + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + io.write(container=dynamic_table, number_of_jobs=number_of_jobs) + + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="r") as io: + dynamic_table_roundtrip = io.read() + generic_iterator_data_roundtrip = dynamic_table_roundtrip["TestGenericIteratorColumn"].data + assert_array_equal(generic_iterator_data_roundtrip, generic_iterator_data) + + classic_iterator_data_roundtrip = dynamic_table_roundtrip["TestClassicIteratorColumn"].data + assert_array_equal(classic_iterator_data_roundtrip, classic_iterator_data) + + generic_iterator_data_roundtrip = dynamic_table_roundtrip["TestUnwrappedColumn"].data + assert_array_equal(generic_iterator_data_roundtrip, unwrappped_data) + + +def test_mixed_iterator_pickleability(tmpdir): + number_of_jobs = 2 + + pickleable_iterator_data = np.array([1., 2., 3.]) + pickleable_iterator_column = VectorData( + name="TestGenericIteratorColumn", + description="", + data=PickleableDataChunkIterator(data=pickleable_iterator_data) + ) + + not_pickleable_iterator_data = np.array([4., 5., 6.]) + not_pickleable_iterator_column = VectorData( + name="TestClassicIteratorColumn", + description="", + data=NotPickleableDataChunkIterator(data=not_pickleable_iterator_data) + ) + + dynamic_table = DynamicTable( + name="TestTable", + description="", + id=list(range(3)), + columns=[pickleable_iterator_column, not_pickleable_iterator_column], + ) + + zarr_top_level_path = str(tmpdir / "test_mixed_iterator_pickleability.zarr") + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + io.write(container=dynamic_table, number_of_jobs=number_of_jobs) + + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="r") as io: + dynamic_table_roundtrip = io.read() + + pickleable_iterator_data_roundtrip = dynamic_table_roundtrip["TestGenericIteratorColumn"].data + assert_array_equal(pickleable_iterator_data_roundtrip, pickleable_iterator_data) + + not_pickleable_iterator_data_roundtrip = dynamic_table_roundtrip["TestClassicIteratorColumn"].data + assert_array_equal(not_pickleable_iterator_data_roundtrip, not_pickleable_iterator_data) + + +@unittest.skipIf(not TQDM_INSTALLED, "optional tqdm module is not installed") +def test_simple_tqdm(tmpdir): + number_of_jobs = 2 + expected_desc = f"Writing Zarr datasets with {number_of_jobs} jobs" + + zarr_top_level_path = str(tmpdir / "test_simple_tqdm.zarr") + with patch("sys.stderr", new=StringIO()) as tqdm_out: + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + column = VectorData( + name="TestColumn", + description="", + data=PickleableDataChunkIterator( + data=np.array([1., 2., 3.]), + display_progress=True, + ) + ) + dynamic_table = DynamicTable(name="TestTable", description="", columns=[column]) + io.write(container=dynamic_table, number_of_jobs=number_of_jobs) + + assert expected_desc in tqdm_out.getvalue() + + +@unittest.skipIf(not TQDM_INSTALLED, "optional tqdm module is not installed") +def test_compound_tqdm(tmpdir): + number_of_jobs = 2 + expected_desc_pickleable = f"Writing Zarr datasets with {number_of_jobs} jobs" + expected_desc_not_pickleable = "Writing non-parallel dataset..." + + zarr_top_level_path = str(tmpdir / "test_compound_tqdm.zarr") + with patch("sys.stderr", new=StringIO()) as tqdm_out: + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + pickleable_column = VectorData( + name="TestPickleableIteratorColumn", + description="", + data=PickleableDataChunkIterator( + data=np.array([1., 2., 3.]), + display_progress=True, + ) + ) + not_pickleable_column = VectorData( + name="TestNotPickleableColumn", + description="", + data=NotPickleableDataChunkIterator( + data=np.array([4., 5., 6.]), + display_progress=True, + progress_bar_options=dict(desc=expected_desc_not_pickleable, position=1) + ) + ) + dynamic_table = DynamicTable( + name="TestTable", description="", columns=[pickleable_column, not_pickleable_column] + ) + io.write(container=dynamic_table, number_of_jobs=number_of_jobs) + + tqdm_out_value = tqdm_out.getvalue() + assert expected_desc_pickleable in tqdm_out_value + assert expected_desc_not_pickleable in tqdm_out_value + + +def test_extra_keyword_argument_propagation(tmpdir): + number_of_jobs = 2 + + column = VectorData(name="TestColumn", description="", data=np.array([1., 2., 3.])) + dynamic_table = DynamicTable(name="TestTable", description="", id=list(range(3)), columns=[column]) + + zarr_top_level_path = str(tmpdir / "test_extra_parallel_write_keyword_arguments.zarr") + + test_keyword_argument_pairs = [ + dict(max_threads_per_process=2, multiprocessing_context=None), + dict(max_threads_per_process=None, multiprocessing_context="spawn"), + dict(max_threads_per_process=2, multiprocessing_context="spawn"), + ] + if platform.system() != "Windows": + test_keyword_argument_pairs.extend( + [ + dict(max_threads_per_process=None, multiprocessing_context="spawn"), + dict(max_threads_per_process=2, multiprocessing_context="spawn"), + ] + ) + + for test_keyword_argument_pair in test_keyword_argument_pairs: + test_max_threads_per_process = test_keyword_argument_pair["max_threads_per_process"] + test_multiprocessing_context = test_keyword_argument_pair["multiprocessing_context"] + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + io.write( + container=dynamic_table, + number_of_jobs=number_of_jobs, + max_threads_per_process=test_max_threads_per_process, + multiprocessing_context=test_multiprocessing_context + ) + + assert io._ZarrIO__dci_queue.max_threads_per_process == test_max_threads_per_process + assert io._ZarrIO__dci_queue.multiprocessing_context == test_multiprocessing_context diff --git a/tox.ini b/tox.ini index 6934d6e4..720a97f5 100644 --- a/tox.ini +++ b/tox.ini @@ -38,7 +38,7 @@ install_command = python -m pip install {opts} {packages} deps = {[testenv]deps} - # -rrequirements-opt.txt + -rrequirements-opt.txt commands = {[testenv]commands} # Test with python 3.11; pinned dev and optional reqs; upgraded run reqs @@ -48,7 +48,7 @@ install_command = python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt - # -rrequirements-opt.txt + -rrequirements-opt.txt commands = {[testenv]commands} # Test with python 3.11; pinned dev and optional reqs; upgraded, pre-release run reqs @@ -58,7 +58,7 @@ install_command = python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt - # -rrequirements-opt.txt + -rrequirements-opt.txt commands = {[testenv]commands} # Test with python 3.8; pinned dev reqs; minimum run reqs @@ -95,7 +95,7 @@ commands = {[testenv:build]commands} basepython = python3.11 deps = {[testenv]deps} - # -rrequirements-opt.txt + -rrequirements-opt.txt commands = {[testenv:build]commands} [testenv:build-py311-upgraded] @@ -104,7 +104,7 @@ install_command = python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt - # -rrequirements-opt.txt + -rrequirements-opt.txt commands = {[testenv:build]commands} [testenv:build-py311-prerelease] @@ -113,7 +113,7 @@ install_command = python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt - # -rrequirements-opt.txt + -rrequirements-opt.txt commands = {[testenv:build]commands} [testenv:build-py38-minimum] @@ -169,7 +169,7 @@ install_command = deps = -rrequirements-dev.txt -rrequirements-doc.txt - # -rrequirements-opt.txt + -rrequirements-opt.txt commands = {[testenv:gallery]commands} # Test with python 3.11; pinned dev, doc, and optional reqs; pre-release run reqs @@ -180,7 +180,7 @@ install_command = deps = -rrequirements-dev.txt -rrequirements-doc.txt - # -rrequirements-opt.txt + -rrequirements-opt.txt commands = {[testenv:gallery]commands} # Test with python 3.8; pinned dev and doc reqs; minimum run reqs @@ -190,4 +190,5 @@ deps = -rrequirements-dev.txt -rrequirements-min.txt -rrequirements-doc.txt + -rrequirements-opt.txt commands = {[testenv:gallery]commands}