From ad2b0d66389903d2e0401b06f72c89fbb96b6f6a Mon Sep 17 00:00:00 2001 From: Matthew Avaylon Date: Thu, 21 Nov 2024 16:12:28 -0800 Subject: [PATCH 01/23] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a0845ed..fbff99b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF-ZARR Changelog -## 1.0.0 (Upcoming) +## 1.0.0 (November 21, 2024) ### Enhancements * Added initial refactor of export, supporting references and internal/external links from Zarr to Zarr. This will introduce breaking changes that could lead to existing exported files to be invalid. This update removes '.' as the object default file source. @mavaylon1 [#194](https://github.com/hdmf-dev/hdmf-zarr/pull/194) From ed2f1a6701a90b4bbb4e4d2ce388ac3e8bbd5cf4 Mon Sep 17 00:00:00 2001 From: rly Date: Wed, 27 Nov 2024 06:47:37 -0800 Subject: [PATCH 02/23] Update requirements files --- requirements-dev.txt | 14 +++++++------- requirements-opt.txt | 2 +- requirements.txt | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 75e9ba1c..07bfd50f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,13 +3,13 @@ # compute coverage, and create test environments. note that depending on the version of python installed, different # versions of requirements may be installed due to package incompatibilities. # -black==24.3.0 -codespell==2.2.6 -coverage==7.6.7 -hdf5plugin==4.3.0 # hdf5plugin is used to test conversion of plugin filters -pre-commit==3.5.0 +black==24.10.0 +codespell==2.3.0 +coverage==7.6.8 +hdf5plugin==5.0.0 # hdf5plugin is used to test conversion of plugin filters +pre-commit==4.0.1 pytest==8.3.3 pytest-cov==6.0.0 python-dateutil==2.8.2 -ruff==0.1.3 -tox==4.11.3 +ruff==0.8.0 +tox==4.23.2 diff --git a/requirements-opt.txt b/requirements-opt.txt index 6fc3bcb0..823b532a 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,3 +1,3 @@ -tqdm==4.67.0 +tqdm==4.67.1 fsspec==2024.10.0 s3fs==2024.10.0 diff --git a/requirements.txt b/requirements.txt index 57647cf2..fbbca0b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,5 @@ zarr==2.18.2; python_version < "3.10" pynwb==2.8.3 numpy==2.1.3; python_version >= "3.10" # numpy 2.1.0 dropped support for python 3.9 numpy==2.0.2; python_version < "3.10" -numcodecs==0.13.1 +numcodecs==0.14.1 threadpoolctl==3.5.0 From 82930a18b34336436e8a726d33735e3aeeef8429 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 27 Nov 2024 06:54:08 -0800 Subject: [PATCH 03/23] Update test for updated hdf5plugin --- tests/unit/test_zarrdataio.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_zarrdataio.py b/tests/unit/test_zarrdataio.py index d6070784..2f820fdf 100644 --- a/tests/unit/test_zarrdataio.py +++ b/tests/unit/test_zarrdataio.py @@ -47,7 +47,7 @@ def test_hdf5_to_zarr_filters_scaleoffset(self): msg = "/test_dset HDF5 scaleoffset filter ignored in Zarr" with self.assertWarnsWith(UserWarning, msg): filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset) - self.assertEqual(len(filters), 0) + self.assertEqual(len(filters), 0) # Close the HDF5 file h5file.close() @@ -60,7 +60,7 @@ def test_hdf5_to_zarr_filters_lzf(self): msg = "/test_dset HDF5 szip or lzf compression ignored in Zarr" with self.assertWarnsWith(UserWarning, msg): filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset) - self.assertEqual(len(filters), 0) + self.assertEqual(len(filters), 0) # Close the HDF5 file h5file.close() @@ -76,7 +76,7 @@ def test_hdf5_to_zarr_filters_lz4(self): msg = "/test_dset HDF5 lz4 compression ignored in Zarr" with self.assertWarnsWith(UserWarning, msg): filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset) - self.assertEqual(len(filters), 0) + self.assertEqual(len(filters), 0) # Close the HDF5 file h5file.close() @@ -92,7 +92,7 @@ def test_hdf5_to_zarr_filters_bitshuffle(self): msg = "/test_dset HDF5 bitshuffle compression ignored in Zarr" with self.assertWarnsWith(UserWarning, msg): filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset) - self.assertEqual(len(filters), 0) + self.assertEqual(len(filters), 0) # Close the HDF5 file h5file.close() @@ -109,10 +109,10 @@ def test_hdf5_to_zarr_filters_other_unsupported(self): data=[1, 2, 3, 4, 5], **hdf5plugin.FciDecomp()) # test that we warn due to the FciDecomp - msg = "/test_fcidecomp HDF5 filter id 32018 with properties None ignored in Zarr." + msg = r"/test_fcidecomp HDF5 filter id 32018 with properties .* ignored in Zarr." with self.assertWarnsWith(UserWarning, msg): filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset_FciDecomp) - self.assertEqual(len(filters), 0) + self.assertEqual(len(filters), 0) # Close the HDF5 file h5file.close() From 744cdb5a3c01a69e1287c4ca3513522d6387622c Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 27 Nov 2024 07:06:54 -0800 Subject: [PATCH 04/23] Fix test --- tests/unit/test_zarrdataio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_zarrdataio.py b/tests/unit/test_zarrdataio.py index 2f820fdf..f52f2f5c 100644 --- a/tests/unit/test_zarrdataio.py +++ b/tests/unit/test_zarrdataio.py @@ -110,7 +110,7 @@ def test_hdf5_to_zarr_filters_other_unsupported(self): **hdf5plugin.FciDecomp()) # test that we warn due to the FciDecomp msg = r"/test_fcidecomp HDF5 filter id 32018 with properties .* ignored in Zarr." - with self.assertWarnsWith(UserWarning, msg): + with self.assertWarnsRegex(UserWarning, msg): filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset_FciDecomp) self.assertEqual(len(filters), 0) # Close the HDF5 file From 1e5985e46f7849a1d38b97428dcd17b7122e0e71 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 27 Nov 2024 07:27:20 -0800 Subject: [PATCH 05/23] Update release instructions --- .github/PULL_REQUEST_TEMPLATE/release.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index e49d144d..0073225e 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -5,7 +5,7 @@ Prepare for release of HDMF-Zarr [version] - [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, `requirements-doc.txt`, and `requirements-opt.txt` to the latest versions, and update dependency ranges in `pyproject.toml` and minimums in `requirements-min.txt` as needed. - Run `pip install pur && pur -r requirements-dev.txt -r requirements.txt -r requirements-opt.txt`. + Run `pip install pur && pur -r -d [requirements file]` to see which packages can be updated. - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed - [ ] Update `pyproject.toml` as needed From 9fe07c2d00c742d2e741ab60a7c977a9e1d15644 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 27 Nov 2024 07:27:26 -0800 Subject: [PATCH 06/23] Update Sphinx workflow --- .github/workflows/check_external_links.yml | 12 +++++------- .gitignore | 1 + docs/source/conf.py | 1 - docs/source/index.rst | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml index 2fadbb80..59773b8b 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_external_links.yml @@ -8,13 +8,10 @@ on: jobs: check-external-links: runs-on: ubuntu-latest + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - uses: actions/checkout@v4 with: submodules: 'recursive' @@ -29,5 +26,6 @@ jobs: python -m pip install --upgrade pip python -m pip install -r requirements-doc.txt -r requirements.txt -r requirements-opt.txt python -m pip install . + - name: Check Sphinx external links - run: sphinx-build -b linkcheck ./docs/source ./test_build + run: sphinx-build -W -b linkcheck ./docs/source ./test_build diff --git a/.gitignore b/.gitignore index 65aa7ee8..91d64df9 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,7 @@ docs/build/ docs/source/hdmf_zarr*.rst docs/source/gen_modules docs/source/tutorials +docs/source/sg_execution_times.rst # example data generated by the tutorials docs/gallery/*.h5 docs/gallery/*.zarr diff --git a/docs/source/conf.py b/docs/source/conf.py index d3bc2002..39669770 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -116,7 +116,6 @@ # a list of builtin themes. # html_theme = "sphinx_rtd_theme" -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] html_theme_options = { 'logo_only': False, # Only show the hdmf-zarr logo without the documentation title diff --git a/docs/source/index.rst b/docs/source/index.rst index 9656c3f7..f9910075 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,8 +6,8 @@ Welcome to hdmf-zarr's documentation! ===================================== -**hdmf_zarr** implements a Zarr backend for `HDMF `_ as well as -convenience classes for integration of Zarr with `PyNWB `_ to +**hdmf_zarr** implements a Zarr backend for `HDMF `_ as well as +convenience classes for integration of Zarr with `PyNWB `_ to support writing of NWB files to `Zarr `_. **Status:** The Zarr backend is **under development** and may still change. See the From 026d5b4df724ec01db6018736b58da58c96bd71b Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 27 Nov 2024 07:43:09 -0800 Subject: [PATCH 07/23] Update workflows to remove cancel action and align with hdmf --- .github/workflows/HDMF_dev.yaml | 20 ++--- .github/workflows/check_external_links.yml | 5 +- .github/workflows/deploy_release.yml | 11 +-- .github/workflows/run_all_tests.yml | 63 +++++++------- .github/workflows/run_coverage.yml | 16 ++-- .github/workflows/run_tests.yml | 96 +++++++++++----------- 6 files changed, 102 insertions(+), 109 deletions(-) diff --git a/.github/workflows/HDMF_dev.yaml b/.github/workflows/HDMF_dev.yaml index b493b079..147c1d7c 100644 --- a/.github/workflows/HDMF_dev.yaml +++ b/.github/workflows/HDMF_dev.yaml @@ -5,18 +5,14 @@ on: [pull_request, workflow_dispatch] jobs: check_compatibility: runs-on: ubuntu-latest + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v4 - with: - fetch-depth: 0 # tags are required for versioneer to determine the version + - name: Checkout repo + uses: actions/checkout@v4 - - name: Set up Python 3.13 + - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.13" @@ -26,7 +22,7 @@ jobs: python -m pip install -r requirements-dev.txt -r requirements.txt pip install . - - name: Clone HDMF Dev Branch + - name: Clone and Install HDMF Dev Branch run: | git clone https://github.com/hdmf-dev/hdmf.git --recurse-submodules cd hdmf @@ -36,7 +32,7 @@ jobs: - name: Run HDMF_Zarr Tests run: - pytest -v tests + pytest -v diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml index 59773b8b..18c85d73 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_external_links.yml @@ -12,9 +12,8 @@ jobs: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true steps: - - uses: actions/checkout@v4 - with: - submodules: 'recursive' + - name: Checkout repo + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index d8c4566c..0af20a40 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -9,16 +9,13 @@ jobs: name: Deploy release from tag runs-on: ubuntu-latest steps: - - name: Checkout repo with submodules + - name: Checkout repo uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required for versioneer to determine the version - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' + python-version: '3.13' - name: Install build dependencies run: | @@ -28,11 +25,11 @@ jobs: - name: Run tox tests run: | - tox -e py312-upgraded + tox -e py313-upgraded - name: Build wheel and source distribution run: | - tox -e build-py312-upgraded + tox -e build-py313-upgraded ls -1 dist - name: Test installation from a wheel diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 92bf2fcc..c3272466 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -18,6 +18,9 @@ jobs: defaults: run: shell: bash + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} + cancel-in-progress: true strategy: fail-fast: false matrix: @@ -47,16 +50,8 @@ jobs: - { name: macos-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: macos-latest } - { name: macos-python3.13-prerelease , test-tox-env: py313-prerelease, build-tox-env: build-py313-prerelease, python-ver: "3.13", os: macos-latest } steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required for versioneer to determine the version + - name: Checkout repo + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 @@ -68,22 +63,33 @@ jobs: python -m pip install --upgrade pip python -m pip install tox python -m pip list + - name: Run tox tests run: | tox -e ${{ matrix.test-tox-env }} + - name: Build wheel and source distribution run: | tox -e ${{ matrix.build-tox-env }} ls -1 dist + - name: Test installation from a wheel run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + run-all-gallery-tests: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} defaults: run: shell: bash + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} + cancel-in-progress: true strategy: fail-fast: false matrix: @@ -98,16 +104,8 @@ jobs: - { name: macos-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded , python-ver: "3.13", os: macos-latest } - { name: macos-gallery-python3.13-prerelease , test-tox-env: gallery-py313-prerelease, python-ver: "3.13", os: macos-latest } steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required for versioneer to determine the version + - name: Checkout repo + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 @@ -119,15 +117,20 @@ jobs: python -m pip install --upgrade pip python -m pip install tox python -m pip list + - name: Run tox tests run: | tox -e ${{ matrix.test-tox-env }} + run-all-tests-on-conda: name: ${{ matrix.name }} runs-on: ubuntu-latest defaults: run: shell: bash -l {0} # needed for conda environment to work + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} + cancel-in-progress: true strategy: fail-fast: false matrix: @@ -141,22 +144,15 @@ jobs: - { name: conda-linux-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } - { name: conda-linux-python3.13-prerelease, test-tox-env: py313-prerelease, build-tox-env: build-py313-prerelease, python-ver: "3.13", os: ubuntu-latest } steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required for versioneer to determine the version + - name: Checkout repo + uses: actions/checkout@v4 - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python-ver }} + channels: conda-forge - name: Install build dependencies run: | @@ -170,6 +166,7 @@ jobs: conda config --show-sources conda list --show-channel-urls + # NOTE tox installs packages from PyPI not conda-forge... - name: Run tox tests run: | tox -e ${{ matrix.test-tox-env }} @@ -182,3 +179,7 @@ jobs: - name: Test installation from a wheel run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index 21150fc5..21a87cbf 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -19,6 +19,9 @@ jobs: defaults: run: shell: bash + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.opt_req }} + cancel-in-progress: true strategy: matrix: include: @@ -26,19 +29,12 @@ jobs: - { os: ubuntu-latest , opt_req: false } - { os: windows-latest, opt_req: false } - { os: macos-latest , opt_req: false } - env: + env: # used by codecov-action OS: ${{ matrix.os }} PYTHON: '3.13' steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' + - name: Checkout repo + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 827606c0..707fb97d 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -15,6 +15,9 @@ jobs: defaults: run: shell: bash + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} + cancel-in-progress: true strategy: fail-fast: false matrix: @@ -28,16 +31,8 @@ jobs: - { name: macos-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: macos-latest } - { name: macos-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: macos-latest } steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required for versioneer to determine the version + - name: Checkout repo + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 @@ -49,16 +44,24 @@ jobs: python -m pip install --upgrade pip python -m pip install tox python -m pip list + - name: Run tox tests run: | tox -e ${{ matrix.test-tox-env }} + - name: Build wheel and source distribution run: | tox -e ${{ matrix.build-tox-env }} ls -1 dist + - name: Test installation from a wheel run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + - name: Upload distribution as a workspace artifact if: ${{ matrix.upload-wheels }} uses: actions/upload-artifact@v4 @@ -72,24 +75,20 @@ jobs: defaults: run: shell: bash + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} + cancel-in-progress: true strategy: fail-fast: false matrix: include: - - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } - - { name: linux-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded, python-ver: "3.13", os: ubuntu-latest } - - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } - - { name: windows-gallery-python3.13-upgraded, test-tox-env: gallery-py313-upgraded, python-ver: "3.13", os: windows-latest } + - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } + - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded , python-ver: "3.13", os: windows-latest } steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' + - name: Checkout repo + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 @@ -101,15 +100,20 @@ jobs: python -m pip install --upgrade pip python -m pip install tox python -m pip list + - name: Run tox tests run: | tox -e ${{ matrix.test-tox-env }} + run-tests-on-conda: name: ${{ matrix.name }} runs-on: ubuntu-latest defaults: run: shell: bash -l {0} + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.name }} + cancel-in-progress: true strategy: fail-fast: false matrix: @@ -117,57 +121,57 @@ jobs: - { name: conda-linux-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } - { name: conda-linux-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required for versioneer to determine the version + - name: Checkout repo + uses: actions/checkout@v4 - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true - auto-activate-base: true - activate-environment: true python-version: ${{ matrix.python-ver }} + channels: conda-forge - name: Install build dependencies run: | conda config --set always_yes yes --set changeps1 no conda info conda install -c conda-forge tox - conda list + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls + + # NOTE tox installs packages from PyPI not conda-forge... - name: Run tox tests run: | tox -e ${{ matrix.test-tox-env }} + - name: Build wheel and source distribution run: | tox -e ${{ matrix.build-tox-env }} ls -1 dist + - name: Test installation from a wheel run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + deploy-dev: name: Deploy pre-release from dev needs: [run-tests, run-gallery-tests, run-tests-on-conda] if: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - name: Checkout repo with submodules + - name: Checkout repo uses: actions/checkout@v4 - with: - submodules: 'recursive' - name: Set up Python uses: actions/setup-python@v5 From e0fac2b91d510ec1b900e200047926b75a98b1e7 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 27 Nov 2024 10:21:07 -0800 Subject: [PATCH 08/23] Update HDMF_dev.yaml --- .github/workflows/HDMF_dev.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/HDMF_dev.yaml b/.github/workflows/HDMF_dev.yaml index 147c1d7c..bbc39c5c 100644 --- a/.github/workflows/HDMF_dev.yaml +++ b/.github/workflows/HDMF_dev.yaml @@ -32,7 +32,6 @@ jobs: - name: Run HDMF_Zarr Tests run: - pytest -v - - - + # specify the tests directory so that we don't run the hdmf tests which are nested + # under the same base directory + pytest -v tests From b23e617b6bef4faf4e806cc6ee231dd65a4d157a Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 27 Nov 2024 11:32:38 -0800 Subject: [PATCH 09/23] Fix dateutil requirement --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 07bfd50f..0d2d536f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,6 +10,6 @@ hdf5plugin==5.0.0 # hdf5plugin is used to test conversion of plugin filters pre-commit==4.0.1 pytest==8.3.3 pytest-cov==6.0.0 -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 ruff==0.8.0 tox==4.23.2 From 81db7eff665a08c8b2a14797b931a0fabc648909 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 27 Nov 2024 12:57:50 -0800 Subject: [PATCH 10/23] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64ca21f3..6cf71d17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ * Add `force_overwite` parameter for `ZarrIO.__init__` to allow overwriting an existing file or directory. @oruebel [#229](https://github.com/hdmf-dev/hdmf-zarr/pull/229) * Remove allowance of `hdmf.Array` in `__init__` of `AbstractZarrTableDataset` and `ZarrDataset` to be compatible with HDMF 4.0. @rly [#236](https://github.com/hdmf-dev/hdmf-zarr/pull/236) * Remove support for python 3.8. @mavaylon1 [#240](https://github.com/hdmf-dev/hdmf-zarr/pull/240) +* Added `NWBZarrIO.read_nwb` convenience method to simplify reading an NWB file. @oruebel [#226](https://github.com/hdmf-dev/hdmf-zarr/pull/226) ### Bug Fixes * Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232) @@ -21,7 +22,6 @@ * Added test for opening file with consolidated metadata from DANDI. @mavaylon1 [#206](https://github.com/hdmf-dev/hdmf-zarr/pull/206) * Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207) * Added link_data --> clear_cache relationship to support repacking zarr nwbfiles: [#215](https://github.com/hdmf-dev/hdmf-zarr/pull/215) -* Added `NWBZarrIO.read_nwb` convenience method to simplify reading an NWB file. @oruebel [#226](https://github.com/hdmf-dev/hdmf-zarr/pull/226) ## 0.8.0 (June 4, 2024) ### Bug Fixes From 9117094a9b5bcafb636e2b39af7148dd0f51328e Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 17:56:11 -0800 Subject: [PATCH 11/23] Remove requirements files, update tox and workflows --- .github/workflows/deploy_release.yml | 4 +- .github/workflows/run_all_tests.yml | 70 +++++----- .github/workflows/run_tests.yml | 29 ++--- pyproject.toml | 32 ++++- requirements-dev.txt | 15 --- requirements-doc.txt | 6 - requirements-min.txt | 6 +- requirements-opt.txt | 3 - requirements.txt | 9 -- tox.ini | 186 +++++++-------------------- 10 files changed, 124 insertions(+), 236 deletions(-) delete mode 100644 requirements-dev.txt delete mode 100644 requirements-doc.txt delete mode 100644 requirements-opt.txt delete mode 100644 requirements.txt diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 0af20a40..bffde629 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -25,11 +25,11 @@ jobs: - name: Run tox tests run: | - tox -e py313-upgraded + tox -e py313 - name: Build wheel and source distribution run: | - tox -e build-py313-upgraded + tox -e build ls -1 dist - name: Test installation from a wheel diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index c3272466..13a38d2b 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -25,30 +25,27 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } - - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } - - { name: linux-python3.12 , test-tox-env: py312 , build-tox-env: build-py312 , python-ver: "3.12", os: ubuntu-latest } - - { name: linux-python3.13 , test-tox-env: py313 , build-tox-env: build-py313 , python-ver: "3.13", os: ubuntu-latest } - - { name: linux-python3.13-optional , test-tox-env: py313-optional , build-tox-env: build-py313-optional , python-ver: "3.13", os: ubuntu-latest } - - { name: linux-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } - - { name: linux-python3.13-prerelease , test-tox-env: py313-prerelease, build-tox-env: build-py313-prerelease, python-ver: "3.13", os: ubuntu-latest } - - { name: windows-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: windows-latest } - - { name: windows-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: windows-latest } - - { name: windows-python3.12 , test-tox-env: py312 , build-tox-env: build-py312 , python-ver: "3.12", os: windows-latest } - - { name: windows-python3.13 , test-tox-env: py313 , build-tox-env: build-py313 , python-ver: "3.13", os: windows-latest } - - { name: windows-python3.13-optional , test-tox-env: py313-optional , build-tox-env: build-py313-optional , python-ver: "3.13", os: windows-latest } - - { name: windows-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: windows-latest } - - { name: windows-python3.13-prerelease, test-tox-env: py313-prerelease, build-tox-env: build-py313-prerelease, python-ver: "3.13", os: windows-latest } - - { name: macos-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: macos-latest } - - { name: macos-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: macos-latest } - - { name: macos-python3.12 , test-tox-env: py312 , build-tox-env: build-py312 , python-ver: "3.12", os: macos-latest } - - { name: macos-python3.13 , test-tox-env: py313 , build-tox-env: build-py313 , python-ver: "3.13", os: macos-latest } - - { name: macos-python3.13-optional , test-tox-env: py313-optional , build-tox-env: build-py313-optional , python-ver: "3.13", os: macos-latest } - - { name: macos-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: macos-latest } - - { name: macos-python3.13-prerelease , test-tox-env: py313-prerelease, build-tox-env: build-py313-prerelease, python-ver: "3.13", os: macos-latest } + - { name: linux-python3.9-minimum , test-tox-env: py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-python3.10 , test-tox-env: py310 , python-ver: "3.10", os: ubuntu-latest } + - { name: linux-python3.11 , test-tox-env: py311 , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.12 , test-tox-env: py312 , python-ver: "3.12", os: ubuntu-latest } + - { name: linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.13-optional , test-tox-env: py313-optional , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.13-prerelease , test-tox-env: py313-prerelease, python-ver: "3.13", os: ubuntu-latest } + - { name: windows-python3.9-minimum , test-tox-env: py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-python3.10 , test-tox-env: py310 , python-ver: "3.10", os: windows-latest } + - { name: windows-python3.11 , test-tox-env: py311 , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.12 , test-tox-env: py312 , python-ver: "3.12", os: windows-latest } + - { name: windows-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: windows-latest } + - { name: windows-python3.13-optional , test-tox-env: py313-optional , python-ver: "3.13", os: windows-latest } + - { name: windows-python3.13-prerelease, test-tox-env: py313-prerelease, python-ver: "3.13", os: windows-latest } + - { name: macos-python3.9-minimum , test-tox-env: py39-minimum , python-ver: "3.9" , os: macos-latest } + - { name: macos-python3.10 , test-tox-env: py310 , python-ver: "3.10", os: macos-latest } + - { name: macos-python3.11 , test-tox-env: py311 , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.12 , test-tox-env: py312 , python-ver: "3.12", os: macos-latest } + - { name: macos-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: macos-latest } + - { name: macos-python3.13-optional , test-tox-env: py313-optional , python-ver: "3.13", os: macos-latest } + - { name: macos-python3.13-prerelease , test-tox-env: py313-prerelease, python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo uses: actions/checkout@v4 @@ -70,7 +67,7 @@ jobs: - name: Build wheel and source distribution run: | - tox -e ${{ matrix.build-tox-env }} + tox -e build ls -1 dist - name: Test installation from a wheel @@ -95,13 +92,13 @@ jobs: matrix: include: - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } - - { name: linux-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-gallery-python3.13-optional , test-tox-env: gallery-py313-optional , python-ver: "3.13", os: ubuntu-latest } - { name: linux-gallery-python3.13-prerelease , test-tox-env: gallery-py313-prerelease, python-ver: "3.13", os: ubuntu-latest } - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } - - { name: windows-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded , python-ver: "3.13", os: windows-latest } + - { name: windows-gallery-python3.13-optional , test-tox-env: gallery-py313-optional , python-ver: "3.13", os: windows-latest } - { name: windows-gallery-python3.13-prerelease, test-tox-env: gallery-py313-prerelease, python-ver: "3.13", os: windows-latest } - { name: macos-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: macos-latest } - - { name: macos-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded , python-ver: "3.13", os: macos-latest } + - { name: macos-gallery-python3.13-optional , test-tox-env: gallery-py313-optional , python-ver: "3.13", os: macos-latest } - { name: macos-gallery-python3.13-prerelease , test-tox-env: gallery-py313-prerelease, python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo @@ -135,14 +132,13 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.8" , os: ubuntu-latest } - - { name: conda-linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } - - { name: conda-linux-python3.12 , test-tox-env: py312 , build-tox-env: build-py312 , python-ver: "3.12", os: ubuntu-latest } - - { name: conda-linux-python3.13 , test-tox-env: py313 , build-tox-env: build-py313 , python-ver: "3.13", os: ubuntu-latest } - - { name: conda-linux-python3.13-optional , test-tox-env: py313-optional , build-tox-env: build-py313-optional , python-ver: "3.13", os: ubuntu-latest } - - { name: conda-linux-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } - - { name: conda-linux-python3.13-prerelease, test-tox-env: py313-prerelease, build-tox-env: build-py313-prerelease, python-ver: "3.13", os: ubuntu-latest } + - { name: conda-linux-python3.9-minimum , test-tox-env: py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: conda-linux-python3.10 , test-tox-env: py310 , python-ver: "3.10", os: ubuntu-latest } + - { name: conda-linux-python3.11 , test-tox-env: py311 , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.12 , test-tox-env: py312 , python-ver: "3.12", os: ubuntu-latest } + - { name: conda-linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest } + - { name: conda-linux-python3.13-optional , test-tox-env: py313-optional , python-ver: "3.13", os: ubuntu-latest } + - { name: conda-linux-python3.13-prerelease, test-tox-env: py313-prerelease, python-ver: "3.13", os: ubuntu-latest } steps: - name: Checkout repo uses: actions/checkout@v4 @@ -173,7 +169,7 @@ jobs: - name: Build wheel and source distribution run: | - tox -e ${{ matrix.build-tox-env }} + tox -e build ls -1 dist - name: Test installation from a wheel diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 707fb97d..82b07f02 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -22,14 +22,13 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } - - { name: linux-python3.13 , test-tox-env: py313 , build-tox-env: build-py313 , python-ver: "3.13", os: ubuntu-latest } + - { name: linux-python3.9-minimum , test-tox-env: py39-minimum, python-ver: "3.9" , os: ubuntu-latest } # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - - { name: linux-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } - - { name: windows-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: windows-latest } - - { name: windows-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: windows-latest } - - { name: macos-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: macos-latest } - - { name: macos-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: macos-latest } + - { name: linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } + - { name: windows-python3.9-minimum, test-tox-env: py39-minimum, python-ver: "3.9" , os: windows-latest } + - { name: windows-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: windows-latest } + - { name: macos-python3.9-minimum , test-tox-env: py39-minimum, python-ver: "3.9" , os: macos-latest } + - { name: macos-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo uses: actions/checkout@v4 @@ -51,7 +50,7 @@ jobs: - name: Build wheel and source distribution run: | - tox -e ${{ matrix.build-tox-env }} + tox -e build ls -1 dist - name: Test installation from a wheel @@ -82,10 +81,10 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } - - { name: linux-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } - - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } - - { name: windows-gallery-python3.13-upgraded , test-tox-env: gallery-py313-upgraded , python-ver: "3.13", os: windows-latest } + - { name: linux-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-gallery-python3.13-optional , test-tox-env: gallery-py313-optional, python-ver: "3.13", os: ubuntu-latest } + - { name: windows-gallery-python3.9-minimum , test-tox-env: gallery-py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-gallery-python3.13-optional, test-tox-env: gallery-py313-optional, python-ver: "3.13", os: windows-latest } steps: - name: Checkout repo uses: actions/checkout@v4 @@ -118,8 +117,8 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.9-minimum , test-tox-env: py39-minimum , build-tox-env: build-py39-minimum , python-ver: "3.9" , os: ubuntu-latest } - - { name: conda-linux-python3.13-upgraded , test-tox-env: py313-upgraded , build-tox-env: build-py313-upgraded , python-ver: "3.13", os: ubuntu-latest } + - { name: conda-linux-python3.9-minimum, test-tox-env: py39-minimum, python-ver: "3.9" , os: ubuntu-latest } + - { name: conda-linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest } steps: - name: Checkout repo uses: actions/checkout@v4 @@ -150,7 +149,7 @@ jobs: - name: Build wheel and source distribution run: | - tox -e ${{ matrix.build-tox-env }} + tox -e build ls -1 dist - name: Test installation from a wheel diff --git a/pyproject.toml b/pyproject.toml index 9b36cba3..3a4cc742 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,12 +28,12 @@ classifiers = [ "Topic :: Scientific/Engineering :: Medical Science Apps." ] dependencies = [ - 'hdmf>=3.14.5', - 'zarr>=2.18.0, <3.0', # pin below 3.0 until HDMF-zarr supports zarr 3.0 - 'numpy>=1.24', - 'numcodecs>=0.10.0', - 'pynwb>=2.8.3', - 'threadpoolctl>=3.1.0', + "hdmf>=3.14.5", + "zarr>=2.18.0, <3.0", # pin below 3.0 until HDMF-zarr supports zarr 3.0 + "numpy>=1.24.0", + "numcodecs>=0.10.0", + "pynwb>=2.8.3", + "threadpoolctl>=3.1.0", ] dynamic = ["version"] @@ -42,6 +42,26 @@ tqdm = ["tqdm>=4.41.0"] fsspec = ["fsspec"] s3fs = ["s3fs"] +# development dependencies +test = [ + "codespell", + "hdf5plugin", # hdf5plugin is used to test conversion of plugin filters + "pre-commit", + "pytest", + "pytest-cov", + "python-dateutil", + "ruff", + "tox", +] + +docs = [ + "matplotlib", + "sphinx>=4", # improved support for docutils>=0.17 + "sphinx_rtd_theme>=1", # <1 does not work with docutils>=0.17 + "sphinx-gallery", + "sphinx-copybutton", +] + [project.urls] "Homepage" = "https://github.com/hdmf-dev/hdmf-zarr" "Bug Tracker" = "https://github.com/hdmf-dev/hdmf-zarr/issues" diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 0d2d536f..00000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,15 +0,0 @@ -# pinned dependencies to reproduce an entire development environment to use HDMF-Zarr, -# run HDMF-Zarr tests, check code style, -# compute coverage, and create test environments. note that depending on the version of python installed, different -# versions of requirements may be installed due to package incompatibilities. -# -black==24.10.0 -codespell==2.3.0 -coverage==7.6.8 -hdf5plugin==5.0.0 # hdf5plugin is used to test conversion of plugin filters -pre-commit==4.0.1 -pytest==8.3.3 -pytest-cov==6.0.0 -python-dateutil==2.9.0.post0 -ruff==0.8.0 -tox==4.23.2 diff --git a/requirements-doc.txt b/requirements-doc.txt deleted file mode 100644 index 4b921319..00000000 --- a/requirements-doc.txt +++ /dev/null @@ -1,6 +0,0 @@ -# dependencies to generate the documentation for HDMF-Zarr -matplotlib -sphinx>=4 # improved support for docutils>=0.17 -sphinx_rtd_theme>=1 # <1 does not work with docutils>=0.17 -sphinx-gallery -sphinx-copybutton diff --git a/requirements-min.txt b/requirements-min.txt index bc954b0a..72bbb64d 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,6 +1,8 @@ +# minimum versions of package dependencies for installing HDMF +# NOTE: these should match the minimum bound for dependencies in pyproject.toml hdmf==3.14.5 -zarr==2.13.0 +zarr==2.18.0 +numpy==1.24.0 numcodecs==0.10.0 pynwb==2.8.3 -setuptools threadpoolctl==3.1.0 diff --git a/requirements-opt.txt b/requirements-opt.txt deleted file mode 100644 index 823b532a..00000000 --- a/requirements-opt.txt +++ /dev/null @@ -1,3 +0,0 @@ -tqdm==4.67.1 -fsspec==2024.10.0 -s3fs==2024.10.0 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index fbbca0b6..00000000 --- a/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -# pinned dependencies to reproduce an entire development environment to use HDMF-ZARR -hdmf==3.14.5 -zarr==2.18.3; python_version >= "3.10" # zarr 2.18.3 dropped support for python 3.9 -zarr==2.18.2; python_version < "3.10" -pynwb==2.8.3 -numpy==2.1.3; python_version >= "3.10" # numpy 2.1.0 dropped support for python 3.9 -numpy==2.0.2; python_version < "3.10" -numcodecs==0.14.1 -threadpoolctl==3.5.0 diff --git a/tox.ini b/tox.ini index 96c145e5..55cd63ce 100644 --- a/tox.ini +++ b/tox.ini @@ -2,201 +2,105 @@ # in multiple virtualenvs. This configuration file will run the # test suite on all supported python versions. To use it, "python -m pip install tox" # and then run "tox" from this directory. +# Tox is used by the GitHub Actions workflow to run tests on multiple environments. [tox] +# NOTE: if the string "py39" is in the environment name, then the py39 interpreter is used, +# so we can omit specifying the basepython version below. envlist = py39, py310, py311, py312, py313 -requires = pip >= 22.0 +requires = pip >= 24.3.1 [testenv] download = True setenv = PYTHONDONTWRITEBYTECODE = 1 - VIRTUALENV_python -m pip = 22.3.1 install_command = - python -m pip install {opts} {packages} - -deps = - -rrequirements-dev.txt - -rrequirements.txt - + python -m pip install -U {opts} {packages} +extras = test commands = - python -m pip check # Check for conflicting packages + python -m pip list + python -m pip check pytest -v # Env to create coverage report locally [testenv:localcoverage] basepython = python3.13 commands = + python -m pip list + python -m pip check pytest --cov=hdmf_zarr coverage html -d tests/coverage/htmlcov -# Test with python 3.13; pinned dev and optional reqs -[testenv:py312-optional] -basepython = python3.13 -install_command = - python -m pip install {opts} {packages} -deps = - {[testenv]deps} - -rrequirements-opt.txt +# Envs that run tests +[testenv:py{39,310,311,312,313}] commands = {[testenv]commands} -# Test with python 3.13; pinned dev and optional reqs; upgraded run reqs -[testenv:py313-upgraded] -basepython = python3.13 -install_command = - python -m pip install -U {opts} {packages} -deps = - -rrequirements-dev.txt - -rrequirements-opt.txt +# Test with python 3.13 and all optional dependencies +[testenv:py313-optional] +extras = {[testenv]extras}, tqdm, fsspec, s3fs commands = {[testenv]commands} -# Test with python 3.13; pinned dev and optional reqs; upgraded, pre-release run reqs +# Test with python 3.13 and all optional dependencies, using pre-release versions [testenv:py313-prerelease] -basepython = python3.13 install_command = python -m pip install -U --pre {opts} {packages} -deps = - -rrequirements-dev.txt - -rrequirements-opt.txt +extras = {[testenv]extras}, tqdm, fsspec, s3fs commands = {[testenv]commands} -# Test with python 3.9; pinned dev reqs; minimum run reqs +# Test with python 3.9 and minimum dependencies [testenv:py39-minimum] -basepython = python3.9 +install_command = + python -m pip install {opts} {packages} deps = - -rrequirements-dev.txt - -rrequirements-min.txt + -r requirements-min.txt commands = {[testenv]commands} + # Envs that builds wheels and source distribution [testenv:build] commands = - python -m pip install --upgrade build + python -m pip install -U build + python -m pip list + python -m pip check python -m build - -[testenv:build-py39] -basepython = python3.9 -commands = {[testenv:build]commands} - -[testenv:build-py310] -basepython = python3.10 -commands = {[testenv:build]commands} - -[testenv:build-py311] -basepython = python3.11 -commands = {[testenv:build]commands} - -[testenv:build-py312] -basepython = python3.12 -commands = {[testenv:build]commands} - -[testenv:build-py313] -basepython = python3.13 -commands = {[testenv:build]commands} - -[testenv:build-py313-optional] -basepython = python3.13 -deps = - {[testenv]deps} - -rrequirements-opt.txt -commands = {[testenv:build]commands} - -[testenv:build-py313-upgraded] -basepython = python3.13 -install_command = - python -m pip install -U {opts} {packages} -deps = - -rrequirements-dev.txt - -rrequirements-opt.txt -commands = {[testenv:build]commands} - -[testenv:build-py313-prerelease] -basepython = python3.13 -install_command = - python -m pip install -U --pre {opts} {packages} -deps = - -rrequirements-dev.txt - -rrequirements-opt.txt -commands = {[testenv:build]commands} - -[testenv:build-py39-minimum] -basepython = python3.9 -deps = - -rrequirements-dev.txt - -rrequirements-min.txt -commands = {[testenv:build]commands} + # Envs that will test installation from a wheel [testenv:wheelinstall] -deps = null -commands = python -c "import hdmf_zarr" +extras = null +commands = + python -m pip list + python -m pip check + python -c "import hdmf_zarr" + # Envs that will execute gallery tests [testenv:gallery] -install_command = - python -m pip install {opts} {packages} - -deps = - -rrequirements-dev.txt - -rrequirements.txt - -rrequirements-doc.txt - +# NOTE: the gallery tests do not require the "docs" extras commands = + python -m pip list + python -m pip check python test_gallery.py -[testenv:gallery-py39] -basepython = python3.9 -deps = {[testenv:gallery]deps} -commands = {[testenv:gallery]commands} - -[testenv:gallery-py310] -basepython = python3.10 -deps = {[testenv:gallery]deps} +[testenv:gallery-{py39,py310,py311,py312,py313}] commands = {[testenv:gallery]commands} -[testenv:gallery-py311] -basepython = python3.11 -deps = {[testenv:gallery]deps} +# Test with python 3.13 and all optional dependencies +[testenv:gallery-py313-optional] +extras = {[testenv:gallery]extras}, tqdm, fsspec, s3fs commands = {[testenv:gallery]commands} -[testenv:gallery-py312] -basepython = python3.12 -deps = {[testenv:gallery]deps} -commands = {[testenv:gallery]commands} - -[testenv:gallery-py313] -basepython = python3.13 -deps = {[testenv:gallery]deps} -commands = {[testenv:gallery]commands} - -# Test with python 3.12; pinned dev, doc, and optional reqs; upgraded run reqs -[testenv:gallery-py313-upgraded] -basepython = python3.13 -install_command = - python -m pip install -U {opts} {packages} -deps = - -rrequirements-dev.txt - -rrequirements-doc.txt - -rrequirements-opt.txt -commands = {[testenv:gallery]commands} - -# Test with python 3.13; pinned dev, doc, and optional reqs; pre-release run reqs +# Test with python 3.13 and all optional dependencies, using pre-release versions [testenv:gallery-py313-prerelease] -basepython = python3.13 install_command = python -m pip install -U --pre {opts} {packages} -deps = - -rrequirements-dev.txt - -rrequirements-doc.txt - -rrequirements-opt.txt +extras = {[testenv:gallery]extras}, tqdm, fsspec, s3fs commands = {[testenv:gallery]commands} -# Test with python 3.9; pinned dev and doc reqs; minimum run reqs +# Test with python 3.9 and minimum dependencies [testenv:gallery-py39-minimum] -basepython = python3.9 +install_command = + python -m pip install {opts} {packages} deps = - -rrequirements-dev.txt - -rrequirements-min.txt - -rrequirements-doc.txt - -rrequirements-opt.txt + -r requirements-min.txt commands = {[testenv:gallery]commands} From 0fb6a386b5b57127562a07f9e1fa7be22050d6b1 Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 17:59:16 -0800 Subject: [PATCH 12/23] Fix rtd --- .readthedocs.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 8d6ffb00..b69025a1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,9 +6,9 @@ version: 2 build: - os: ubuntu-22.04 + os: ubuntu-24.04 tools: - python: '3.12' + python: '3.13' # Build documentation in the docs/ directory with Sphinx sphinx: @@ -24,10 +24,7 @@ formats: all # Optionally set the version of Python and requirements required to build your docs python: install: - - requirements: requirements-doc.txt - - requirements: requirements.txt - - requirements: requirements-opt.txt - - path: . # path to the package relative to the root + - path: .[docs,tqdm,fsspec,s3fs] # path to the package relative to the root # Optionally include all submodules submodules: From bacaa315c8a90cba81608f67d1efd301b5ae9602 Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 18:05:07 -0800 Subject: [PATCH 13/23] Update per-PR workflows --- .github/workflows/run_tests.yml | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 82b07f02..fe65f0ac 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -22,13 +22,16 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.9-minimum , test-tox-env: py39-minimum, python-ver: "3.9" , os: ubuntu-latest } + - { name: linux-python3.9-minimum , test-tox-env: py39-minimum , python-ver: "3.9" , os: ubuntu-latest } # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - - { name: linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } - - { name: windows-python3.9-minimum, test-tox-env: py39-minimum, python-ver: "3.9" , os: windows-latest } - - { name: windows-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: windows-latest } - - { name: macos-python3.9-minimum , test-tox-env: py39-minimum, python-ver: "3.9" , os: macos-latest } - - { name: macos-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: macos-latest } + - { name: linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } + - { name: linux-python3.13-optional, test-tox-env: py313-optional, python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } + - { name: windows-python3.9-minimum, test-tox-env: py39-minimum , python-ver: "3.9" , os: windows-latest } + - { name: windows-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: windows-latest } + - { name: windows-python3.13-optional, test-tox-env: py313-optional, python-ver: "3.13", os: windows-latest } + - { name: macos-python3.9-minimum , test-tox-env: py39-minimum , python-ver: "3.9" , os: macos-latest } + - { name: macos-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: macos-latest } + - { name: macos-python3.13-optional, test-tox-env: py313-optional, python-ver: "3.13", os: macos-latest } steps: - name: Checkout repo uses: actions/checkout@v4 @@ -117,8 +120,9 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.9-minimum, test-tox-env: py39-minimum, python-ver: "3.9" , os: ubuntu-latest } - - { name: conda-linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest } + - { name: conda-linux-python3.9-minimum , test-tox-env: py39-minimum , python-ver: "3.9" , os: ubuntu-latest } + - { name: conda-linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest } + - { name: conda-linux-python3.13-optional, test-tox-env: py313-optional, python-ver: "3.13", os: ubuntu-latest } steps: - name: Checkout repo uses: actions/checkout@v4 From 3e6d6a62cba16d6d0c6396d4992a1d42df50bc9c Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 18:24:34 -0800 Subject: [PATCH 14/23] Update docs, change optional deps group --- .github/PULL_REQUEST_TEMPLATE/release.md | 10 ++++------ .github/workflows/HDMF_dev.yaml | 3 +-- .github/workflows/check_external_links.yml | 3 +-- .github/workflows/run_coverage.yml | 23 +++++++++++----------- .readthedocs.yaml | 2 +- docs/source/installation.rst | 16 +++++++-------- pyproject.toml | 14 ++++++++++--- tox.ini | 8 ++++---- 8 files changed, 42 insertions(+), 37 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 0073225e..ad8663af 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -2,10 +2,8 @@ Prepare for release of HDMF-Zarr [version] ### Before merging: - [ ] Make sure all PRs to be included in this release have been merged to `dev`. -- [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, - `requirements-doc.txt`, and `requirements-opt.txt` to the latest versions, - and update dependency ranges in `pyproject.toml` and minimums in `requirements-min.txt` as needed. - Run `pip install pur && pur -r -d [requirements file]` to see which packages can be updated. +- [ ] Major and minor releases: Update dependency ranges in `pyproject.toml` and minimums in + `requirements-min.txt` as needed. - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed - [ ] Update `pyproject.toml` as needed @@ -27,5 +25,5 @@ Prepare for release of HDMF-Zarr [version] 4. Either monitor [conda-forge/hdmf_zarr-feedstock](https://github.com/conda-forge/hdmf_zarr-feedstock) for the regro-cf-autotick-bot bot to create a PR updating the version of HDMF to the latest PyPI release, usually within 24 hours of release, or manually create a PR updating `recipe/meta.yaml` with the latest version number - and SHA256 retrieved from PyPI > HDMF-Zarr > Download Files > View hashes for the `.tar.gz` file. Re-render and update - dependencies as needed. + and SHA256 retrieved from PyPI > HDMF-Zarr > Download Files > View hashes for the `.tar.gz` file. Re-render and + update the dependencies as needed. diff --git a/.github/workflows/HDMF_dev.yaml b/.github/workflows/HDMF_dev.yaml index bbc39c5c..4500b792 100644 --- a/.github/workflows/HDMF_dev.yaml +++ b/.github/workflows/HDMF_dev.yaml @@ -19,8 +19,7 @@ jobs: - name: Install HDMF_Zarr Requirements run: | - python -m pip install -r requirements-dev.txt -r requirements.txt - pip install . + python -m pip install ".[test]" - name: Clone and Install HDMF Dev Branch run: | diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml index 18c85d73..1d2e2b65 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_external_links.yml @@ -23,8 +23,7 @@ jobs: - name: Install Sphinx dependencies and package run: | python -m pip install --upgrade pip - python -m pip install -r requirements-doc.txt -r requirements.txt -r requirements-opt.txt - python -m pip install . + python -m pip install ".[test,docs,full]" - name: Check Sphinx external links run: sphinx-build -W -b linkcheck ./docs/source ./test_build diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index 21a87cbf..20011920 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -41,29 +41,30 @@ jobs: with: python-version: ${{ env.PYTHON }} - - name: Install dependencies + - name: Upgrade pip run: | python -m pip install --upgrade pip - python -m pip install -r requirements-dev.txt -r requirements.txt - - - name: Install optional dependencies - if: ${{ matrix.opt_req }} - run: python -m pip install -r requirements-opt.txt - name: Install package + if: ! ${{ matrix.opt_req }} run: | - python -m pip install . - python -m pip list + python -m pip install ".[test]" + + - name: Install package with optional dependencies + if: ${{ matrix.opt_req }} + run: python -m pip install ".[test,full]" - name: Run tests and generate coverage report run: | # coverage is configured in pyproject.toml - pytest --cov --cov-report=xml --cov-report=term # codecov uploader requires xml format + # codecov uploader requires xml format + python -m pip list + pytest --cov --cov-report=xml --cov-report=term - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: fail_ci_if_error: true - file: ./coverage.xml + files: ./coverage.xml env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b69025a1..a200e362 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -24,7 +24,7 @@ formats: all # Optionally set the version of Python and requirements required to build your docs python: install: - - path: .[docs,tqdm,fsspec,s3fs] # path to the package relative to the root + - path: .[docs,full] # path to the package relative to the root # Optionally include all submodules submodules: diff --git a/docs/source/installation.rst b/docs/source/installation.rst index fc55f947..e203f2d7 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -24,14 +24,15 @@ For Developers Install hdmf-zarr from GitHub ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The following illustrates how to install both ``hdmf`` and ``hdfm_zarr`` from GitHub -in a Conda environment. Normally we don't need to install ``hdmf`` directly, but until -``hdmf 3.4.0`` is released we need to use the ``dev`` version of ``hdmf``. +The following illustrates how to install both ``hdmf`` and ``hdmf_zarr`` from GitHub +in a Conda environment, with all of the optional, testing, and documentation dependencies +for hdmf-zarr. Normally, we don't need to install ``hdmf`` directly, but it is +often useful to use the ``dev`` branch of the ``hdmf`` GitHub repository. .. code-block:: - conda create --name hdmf-zarr-test python=3.9 - conda activate hdmf-zarr-test + conda create --name hdmf-zarr-dev python=3.13 + conda activate hdmf-zarr-dev git clone --recurse-submodules https://github.com/hdmf-dev/hdmf.git cd hdmf @@ -41,12 +42,11 @@ in a Conda environment. Normally we don't need to install ``hdmf`` directly, but git clone https://github.com/hdmf-dev/hdmf-zarr.git cd hdmf-zarr - pip install -r requirements.txt -r requirements-dev.txt -r requirements-doc.txt - pip install -e . + pip install -e ".[all]" .. note:: - Depending on versions, it is possible that when installing ``hdmf-zarr`` that pip will + Depending on versions, it is possible that when installing ``hdmf-zarr``, that ``pip`` will install HDMF directly from PyPI instead of using the development version of HDMF that is already installed. In that case call ``pip uninstall hdmf`` and go to the ``hdmf`` directory and run ``pip install -e .`` again diff --git a/pyproject.toml b/pyproject.toml index 3a4cc742..25adfcce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,9 +38,12 @@ dependencies = [ dynamic = ["version"] [project.optional-dependencies] -tqdm = ["tqdm>=4.41.0"] -fsspec = ["fsspec"] -s3fs = ["s3fs"] +# all optional dependencies +full = [ + "tqdm>=4.41.0", + "fsspec", + "s3fs", +] # development dependencies test = [ @@ -54,6 +57,7 @@ test = [ "tox", ] +# documentation dependencies docs = [ "matplotlib", "sphinx>=4", # improved support for docutils>=0.17 @@ -62,6 +66,10 @@ docs = [ "sphinx-copybutton", ] +# all possible dependencies +all = ["hdmf-zarr[full,test,docs]"] + + [project.urls] "Homepage" = "https://github.com/hdmf-dev/hdmf-zarr" "Bug Tracker" = "https://github.com/hdmf-dev/hdmf-zarr/issues" diff --git a/tox.ini b/tox.ini index 55cd63ce..22e02a64 100644 --- a/tox.ini +++ b/tox.ini @@ -37,14 +37,14 @@ commands = {[testenv]commands} # Test with python 3.13 and all optional dependencies [testenv:py313-optional] -extras = {[testenv]extras}, tqdm, fsspec, s3fs +extras = {[testenv]extras}, full commands = {[testenv]commands} # Test with python 3.13 and all optional dependencies, using pre-release versions [testenv:py313-prerelease] install_command = python -m pip install -U --pre {opts} {packages} -extras = {[testenv]extras}, tqdm, fsspec, s3fs +extras = {[testenv]extras}, full commands = {[testenv]commands} # Test with python 3.9 and minimum dependencies @@ -87,14 +87,14 @@ commands = {[testenv:gallery]commands} # Test with python 3.13 and all optional dependencies [testenv:gallery-py313-optional] -extras = {[testenv:gallery]extras}, tqdm, fsspec, s3fs +extras = {[testenv:gallery]extras}, full commands = {[testenv:gallery]commands} # Test with python 3.13 and all optional dependencies, using pre-release versions [testenv:gallery-py313-prerelease] install_command = python -m pip install -U --pre {opts} {packages} -extras = {[testenv:gallery]extras}, tqdm, fsspec, s3fs +extras = {[testenv:gallery]extras}, full commands = {[testenv:gallery]commands} # Test with python 3.9 and minimum dependencies From 5e7f4599a52ec96617e54b7d5d9d2cd0bd216720 Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 18:36:53 -0800 Subject: [PATCH 15/23] Configure dependabot --- .github/dependabot.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..24615639 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,18 @@ +version: 2 +updates: + # disable checking python requirements files because there are too + # many updates and dependabot will not ignore requirements-min.txt + # until https://github.com/dependabot/dependabot-core/issues/2883 is resolved + # workaround is to continue updating these files manually + + # - package-ecosystem: "pip" + # directory: "/" + # schedule: + # # Check for updates to requirements files and pyproject.toml every week + # interval: "weekly" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every week + interval: "weekly" From 0cac2d734c7ab932c0e6da1ebb9ec36c6d8ae64a Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 18:37:42 -0800 Subject: [PATCH 16/23] Fix workflow --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index fe65f0ac..521995c1 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -25,7 +25,7 @@ jobs: - { name: linux-python3.9-minimum , test-tox-env: py39-minimum , python-ver: "3.9" , os: ubuntu-latest } # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - { name: linux-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } - - { name: linux-python3.13-optional, test-tox-env: py313-optional, python-ver: "3.13", os: ubuntu-latest , upload-wheels: true } + - { name: linux-python3.13-optional, test-tox-env: py313-optional, python-ver: "3.13", os: ubuntu-latest } - { name: windows-python3.9-minimum, test-tox-env: py39-minimum , python-ver: "3.9" , os: windows-latest } - { name: windows-python3.13 , test-tox-env: py313 , python-ver: "3.13", os: windows-latest } - { name: windows-python3.13-optional, test-tox-env: py313-optional, python-ver: "3.13", os: windows-latest } From 6bbbce2442d5d858a5c53ef02dfa8bd3bf258038 Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 18:40:24 -0800 Subject: [PATCH 17/23] Update changelog --- CHANGELOG.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cf71d17..1a76895f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # HDMF-ZARR Changelog -## 1.0.0 (November 21, 2024) +## 0.10.0 (Upcoming) ### Enhancements * Added initial refactor of export, supporting references and internal/external links from Zarr to Zarr. This will introduce breaking changes that could lead to existing exported files to be invalid. This update removes '.' as the object default file source. @mavaylon1 [#194](https://github.com/hdmf-dev/hdmf-zarr/pull/194) @@ -10,12 +10,16 @@ * Remove allowance of `hdmf.Array` in `__init__` of `AbstractZarrTableDataset` and `ZarrDataset` to be compatible with HDMF 4.0. @rly [#236](https://github.com/hdmf-dev/hdmf-zarr/pull/236) * Remove support for python 3.8. @mavaylon1 [#240](https://github.com/hdmf-dev/hdmf-zarr/pull/240) * Added `NWBZarrIO.read_nwb` convenience method to simplify reading an NWB file. @oruebel [#226](https://github.com/hdmf-dev/hdmf-zarr/pull/226) +* Updated optional dependency groups in `pyproject.toml` and GitHub Actions workflows. @rly, @mavaylon1 [#239](https://github.com/hdmf-dev/hdmf-zarr/pull/239) +* Added testing for Python 3.13. @rly, @mavaylon1 [#239](https://github.com/hdmf-dev/hdmf-zarr/pull/239) ### Bug Fixes * Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232) * Fix hiding of pynwb compatibility errors. @rly [242](https://github.com/hdmf-dev/hdmf-zarr/pull/242) + ## 0.9.0 (September 16, 2024) + ### Enhancements * Added support for appending a dataset of references. @mavaylon1 [#203](https://github.com/hdmf-dev/hdmf-zarr/pull/203) * NWBZarrIO load_namespaces=True by default. @mavaylon1 [#204](https://github.com/hdmf-dev/hdmf-zarr/pull/204) @@ -23,12 +27,16 @@ * Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207) * Added link_data --> clear_cache relationship to support repacking zarr nwbfiles: [#215](https://github.com/hdmf-dev/hdmf-zarr/pull/215) + ## 0.8.0 (June 4, 2024) + ### Bug Fixes * Fixed bug when opening a file in with `mode=r+`. The file will open without using the consolidated metadata. @mavaylon1 [#182](https://github.com/hdmf-dev/hdmf-zarr/issues/182) * Fixed bug on how we access scalar arrays. Added warning filter for Zarr deprecation of NestedDirectoryStore. Fixed bug on how we write a dataset of references. @mavaylon1 [#195](https://github.com/hdmf-dev/hdmf-zarr/pull/195) + ## 0.7.0 (May 2, 2024) + ### Enhancements * Added support for python 3.12. @mavaylon1 [#172](https://github.com/hdmf-dev/hdmf-zarr/pull/172) * Added support for forcing read of files without consolidated metadata using `mode=r-` in `ZarrIO`. @oruebel [#183](https://github.com/hdmf-dev/hdmf-zarr/pull/183) @@ -41,6 +49,7 @@ * Fixed bug in `ZarrIO.__open_file_consolidated` that led to remote files being opened without consolidated metadata. @oruebel [#184](https://github.com/hdmf-dev/hdmf-zarr/pull/184) * Fixed minor bug where `ZarrIO.__open_file_consolidated` used properties of `ZarrIO` instead of the provided input parameters. @oruebel [#183](https://github.com/hdmf-dev/hdmf-zarr/pull/183) + ## 0.6.0 (February 21, 2024) ### Enhancements From 802ad392a674fcf52a8c403f85267e70cd3ef524 Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 18:42:19 -0800 Subject: [PATCH 18/23] Update changelog --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a76895f..639958cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,9 @@ * Updated packages used for testing and readthedocs configuration. @mavaylon1, @rly [#214](https://github.com/hdmf-dev/hdmf-zarr/pull/214) * Add `force_overwite` parameter for `ZarrIO.__init__` to allow overwriting an existing file or directory. @oruebel [#229](https://github.com/hdmf-dev/hdmf-zarr/pull/229) * Remove allowance of `hdmf.Array` in `__init__` of `AbstractZarrTableDataset` and `ZarrDataset` to be compatible with HDMF 4.0. @rly [#236](https://github.com/hdmf-dev/hdmf-zarr/pull/236) -* Remove support for python 3.8. @mavaylon1 [#240](https://github.com/hdmf-dev/hdmf-zarr/pull/240) +* Remove support for python 3.8 and added testing for Python 3.13. @mavaylon1 [#240](https://github.com/hdmf-dev/hdmf-zarr/pull/240) * Added `NWBZarrIO.read_nwb` convenience method to simplify reading an NWB file. @oruebel [#226](https://github.com/hdmf-dev/hdmf-zarr/pull/226) * Updated optional dependency groups in `pyproject.toml` and GitHub Actions workflows. @rly, @mavaylon1 [#239](https://github.com/hdmf-dev/hdmf-zarr/pull/239) -* Added testing for Python 3.13. @rly, @mavaylon1 [#239](https://github.com/hdmf-dev/hdmf-zarr/pull/239) ### Bug Fixes * Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232) From fea4efd83828cb12f6dac4ba376e779e90234096 Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 19:27:25 -0800 Subject: [PATCH 19/23] Apply black to src directory --- .pre-commit-config.yaml | 10 +- MANIFEST.in | 4 - pyproject.toml | 26 +- src/hdmf_zarr/__init__.py | 16 +- src/hdmf_zarr/_due.py | 37 +- src/hdmf_zarr/backend.py | 806 ++++++++++++++++++++---------------- src/hdmf_zarr/nwb.py | 81 ++-- src/hdmf_zarr/utils.py | 227 +++++----- src/hdmf_zarr/zarr_utils.py | 42 +- 9 files changed, 695 insertions(+), 554 deletions(-) delete mode 100644 MANIFEST.in diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 137ee188..d1fb0d7f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,18 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v5.0.0 hooks: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace + - id: check-added-large-files + - id: check-json + - id: check-toml + - id: name-tests-test + args: [--pytest-test-first] + - id: check-docstring-first - repo: https://github.com/psf/black - rev: 22.6.0 + rev: 24.10.0 hooks: - id: black exclude: ^docs/ diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index de5b2302..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,4 +0,0 @@ -include LICENSE.txt versioneer.py src/hdmf_zarr/_version.py src/hdmf_zarr/_due.py -include requirements.txt requirements-dev.txt requirements-doc.txt requirements-opt.txt -include test.py tox.ini -graft tests diff --git a/pyproject.toml b/pyproject.toml index 25adfcce..65be5d24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,31 +109,9 @@ omit = [ [tool.black] line-length = 120 -target-version = ['py38'] +target-version = ['py313'] include = '\.pyi?$' -extend-exclude = ''' -/( - \.toml - |\.yml - |\.txt - |\.sh - |\.git - |\.ini - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | build - | dist -)/ -''' -force-exclude = ''' -/( - /*.txt - /docs - /docs/* -)\ -''' +force-exclude = 'docs/*' [tool.ruff] lint.select = ["E", "F", "T100", "T201", "T203"] diff --git a/src/hdmf_zarr/__init__.py b/src/hdmf_zarr/__init__.py index 6a33ab4b..3866afbb 100644 --- a/src/hdmf_zarr/__init__.py +++ b/src/hdmf_zarr/__init__.py @@ -12,9 +12,14 @@ __version__ = version("hdmf") del version +__all__ = ["ZarrIO", "ZarrDataIO", "NWBZarrIO"] + # Duecredit definitions from ._due import due, BibTeX # noqa: E402 -due.cite(BibTeX(""" + +due.cite( + BibTeX( + """ @INPROCEEDINGS{9005648, author={A. J. {Tritt} and O. {Rübel} and B. {Dichter} and R. {Ly} and D. {Kang} and E. F. {Chang} and L. M. {Frank} and K. {Bouchard}}, booktitle={2019 IEEE International Conference on Big Data (Big Data)}, @@ -24,6 +29,11 @@ number={}, pages={165-179}, doi={10.1109/BigData47090.2019.9005648}} -"""), description="HDMF: Hierarchical Data Modeling Framework for Modern Science Data Standards", # noqa: E501 - path="hdmf/", version=__version__, cite_module=True) +""" + ), + description="HDMF: Hierarchical Data Modeling Framework for Modern Science Data Standards", # noqa: E501 + path="hdmf/", + version=__version__, + cite_module=True, +) del due, BibTeX diff --git a/src/hdmf_zarr/_due.py b/src/hdmf_zarr/_due.py index f729f843..f5d20fe9 100644 --- a/src/hdmf_zarr/_due.py +++ b/src/hdmf_zarr/_due.py @@ -1,6 +1,10 @@ # emacs: at the end of the file # ex: set sts=4 ts=4 sw=4 et: # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### # +from __future__ import annotations + +from typing import Any + """ Stub file for a guaranteed safe import of duecredit constructs: if duecredit @@ -24,46 +28,49 @@ License: BSD-2 """ -__version__ = '0.0.9' +__version__ = "0.0.9" -class InactiveDueCreditCollector(object): +class InactiveDueCreditCollector: """Just a stub at the Collector which would not do anything""" - def _donothing(self, *args, **kwargs): + + def _donothing(self, *_args: Any, **_kwargs: Any) -> None: """Perform no good and no bad""" pass - def dcite(self, *args, **kwargs): + def dcite(self, *_args: Any, **_kwargs: Any): """If I could cite I would""" + def nondecorating_decorator(func): return func + return nondecorating_decorator active = False activate = add = cite = dump = load = _donothing - def __repr__(self): - return self.__class__.__name__ + '()' + def __repr__(self) -> str: + return self.__class__.__name__ + "()" -def _donothing_func(*args, **kwargs): +def _donothing_func(*args: Any, **kwargs: Any) -> None: """Perform no good and no bad""" pass try: - from duecredit import due, BibTeX, Doi, Url, Text # lgtm [py/unused-import] - if 'due' in locals() and not hasattr(due, 'cite'): - raise RuntimeError( - "Imported due lacks .cite. DueCredit is now disabled") + from duecredit import BibTeX, Doi, Text, Url, due # lgtm [py/unused-import] + + if "due" in locals() and not hasattr(due, "cite"): + raise RuntimeError("Imported due lacks .cite. DueCredit is now disabled") except Exception as e: if not isinstance(e, ImportError): import logging - logging.getLogger("duecredit").error( - "Failed to import duecredit due to %s" % str(e)) + + logging.getLogger("duecredit").error("Failed to import duecredit due to %s" % str(e)) # Initiate due stub - due = InactiveDueCreditCollector() - BibTeX = Doi = Url = Text = _donothing_func + due = InactiveDueCreditCollector() # type: ignore + BibTeX = Doi = Url = Text = _donothing_func # type: ignore # Emacs mode definitions # Local Variables: diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index e209b8c0..3b8de130 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -1,4 +1,5 @@ """Module with the Zarr-based I/O-backend for HDMF""" + # Python imports import os import shutil @@ -11,41 +12,21 @@ import zarr from zarr.hierarchy import Group from zarr.core import Array -from zarr.storage import (DirectoryStore, - TempStore, - NestedDirectoryStore, - ConsolidatedMetadataStore) +from zarr.storage import DirectoryStore, TempStore, NestedDirectoryStore, ConsolidatedMetadataStore import numcodecs # HDMF-ZARR imports -from .utils import (ZarrDataIO, - ZarrReference, - ZarrSpecWriter, - ZarrSpecReader, - ZarrIODataChunkIteratorQueue) +from .utils import ZarrDataIO, ZarrReference, ZarrSpecWriter, ZarrSpecReader, ZarrIODataChunkIteratorQueue from .zarr_utils import BuilderZarrReferenceDataset, BuilderZarrTableDataset # HDMF imports from hdmf.backends.io import HDMFIO from hdmf.backends.errors import UnsupportedOperation -from hdmf.backends.utils import (NamespaceToBuilderHelper, - WriteStatusTracker) -from hdmf.utils import (docval, - getargs, - popargs, - get_docval, - get_data_shape) -from hdmf.build import (Builder, - GroupBuilder, - DatasetBuilder, - LinkBuilder, - BuildManager, - ReferenceBuilder, - TypeMap) +from hdmf.backends.utils import NamespaceToBuilderHelper, WriteStatusTracker +from hdmf.utils import docval, getargs, popargs, get_docval, get_data_shape +from hdmf.build import Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, ReferenceBuilder, TypeMap from hdmf.data_utils import AbstractDataChunkIterator -from hdmf.spec import (RefSpec, - DtypeSpec, - NamespaceCatalog) +from hdmf.spec import RefSpec, DtypeSpec, NamespaceCatalog from hdmf.query import HDMFDataset from hdmf.container import Container @@ -53,24 +34,22 @@ # Module variables -ROOT_NAME = 'root' +ROOT_NAME = "root" """ Name of the root builder for read/write """ -SPEC_LOC_ATTR = '.specloc' +SPEC_LOC_ATTR = ".specloc" """ Reserved attribute storing the path to the Group where the schema for the file are cached """ -DEFAULT_SPEC_LOC_DIR = 'specifications' +DEFAULT_SPEC_LOC_DIR = "specifications" """ Default name of the group where specifications should be cached """ -SUPPORTED_ZARR_STORES = (DirectoryStore, - TempStore, - NestedDirectoryStore) +SUPPORTED_ZARR_STORES = (DirectoryStore, TempStore, NestedDirectoryStore) """ Tuple listing all Zarr storage backends supported by ZarrIO """ @@ -87,34 +66,69 @@ def can_read(path): except Exception: return False - @docval({'name': 'path', - 'type': (str, Path, *SUPPORTED_ZARR_STORES), - 'doc': 'the path to the Zarr file or a supported Zarr store'}, - {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, - {'name': 'mode', 'type': str, - 'doc': 'the mode to open the Zarr file with, one of ("w", "r", "r+", "a", "r-"). ' - 'the mode r- is used to force open without consolidated metadata in read only mode.'}, - {'name': 'synchronizer', 'type': (zarr.ProcessSynchronizer, zarr.ThreadSynchronizer, bool), - 'doc': 'Zarr synchronizer to use for parallel I/O. If set to True a ProcessSynchronizer is used.', - 'default': None}, - {'name': 'object_codec_class', 'type': None, - 'doc': 'Set the numcodec object codec class to be used to encode objects.' - 'Use numcodecs.pickles.Pickle by default.', - 'default': None}, - {'name': 'storage_options', 'type': dict, - 'doc': 'Zarr storage options to read remote folders', - 'default': None}, - {'name': 'force_overwrite', - 'type': bool, - 'doc': "force overwriting existing object when in 'w' mode. The existing file or directory" - " will be deleted when before opening (even if the object is not Zarr, e.g,. an HDF5 file)", - 'default': False} - ) + @docval( + { + "name": "path", + "type": (str, Path, *SUPPORTED_ZARR_STORES), + "doc": "the path to the Zarr file or a supported Zarr store", + }, + { + "name": "manager", + "type": BuildManager, + "doc": "the BuildManager to use for I/O", + "default": None, + }, + { + "name": "mode", + "type": str, + "doc": ( + 'the mode to open the Zarr file with, one of ("w", "r", "r+", "a", "r-"). ' + "the mode r- is used to force open without consolidated metadata in read only mode." + ), + }, + { + "name": "synchronizer", + "type": (zarr.ProcessSynchronizer, zarr.ThreadSynchronizer, bool), + "doc": "Zarr synchronizer to use for parallel I/O. If set to True a ProcessSynchronizer is used.", + "default": None, + }, + { + "name": "object_codec_class", + "type": None, + "doc": ( + "Set the numcodec object codec class to be used to encode objects." + "Use numcodecs.pickles.Pickle by default." + ), + "default": None, + }, + { + "name": "storage_options", + "type": dict, + "doc": "Zarr storage options to read remote folders", + "default": None, + }, + { + "name": "force_overwrite", + "type": bool, + "doc": ( + "force overwriting existing object when in 'w' mode. The existing file or directory" + " will be deleted when before opening (even if the object is not Zarr, e.g,. an HDF5 file)" + ), + "default": False, + }, + ) def __init__(self, **kwargs): - self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) + self.logger = logging.getLogger("%s.%s" % (self.__class__.__module__, self.__class__.__qualname__)) path, manager, mode, synchronizer, object_codec_class, storage_options, force_overwrite = popargs( - 'path', 'manager', 'mode', 'synchronizer', 'object_codec_class', - 'storage_options', 'force_overwrite', kwargs) + "path", + "manager", + "mode", + "synchronizer", + "object_codec_class", + "storage_options", + "force_overwrite", + kwargs, + ) if manager is None: manager = BuildManager(TypeMap(NamespaceCatalog())) if isinstance(synchronizer, bool): @@ -184,31 +198,35 @@ def open(self): if self.__file is None: # Allow overwriting an existing file (e.g., an HDF5 file). Zarr will normally fail if the # existing object at the path is a file. So if we are in `w` mode we need to delete the file first - if self.mode == 'w' and self.__force_overwrite: + if self.mode == "w" and self.__force_overwrite: if isinstance(self.path, (str, Path)) and os.path.exists(self.path): - if os.path.isdir(self.path): # directory + if os.path.isdir(self.path): # directory shutil.rmtree(self.path) else: # File os.remove(self.path) # Within zarr, open_consolidated only allows the mode to be 'r' or 'r+'. # As a result, when in other modes, the file will not use consolidated metadata. - if self.mode != 'r': + if self.mode != "r": # When we consolidate metadata, we use ConsolidatedMetadataStore. # This interface does not allow for setting items. # In the doc string, it says it is "read only". As a result, we cannot use r+ with consolidate_metadata. # r- is only an internal mode in ZarrIO to force the use of regular open. For Zarr we need to # use the regular mode r when r- is specified - mode_to_use = self.mode if self.mode != 'r-' else 'r' - self.__file = zarr.open(store=self.path, - mode=mode_to_use, - synchronizer=self.__synchronizer, - storage_options=self.__storage_options) + mode_to_use = self.mode if self.mode != "r-" else "r" + self.__file = zarr.open( + store=self.path, + mode=mode_to_use, + synchronizer=self.__synchronizer, + storage_options=self.__storage_options, + ) else: - self.__file = self.__open_file_consolidated(store=self.path, - mode=self.mode, - synchronizer=self.__synchronizer, - storage_options=self.__storage_options) + self.__file = self.__open_file_consolidated( + store=self.path, + mode=self.mode, + synchronizer=self.__synchronizer, + storage_options=self.__storage_options, + ) def close(self): """Close the Zarr file""" @@ -218,29 +236,38 @@ def close(self): def is_remote(self): """Return True if the file is remote, False otherwise""" from zarr.storage import FSStore + if isinstance(self.file.store, FSStore): return True else: return False @classmethod - @docval({'name': 'namespace_catalog', - 'type': (NamespaceCatalog, TypeMap), - 'doc': 'the NamespaceCatalog or TypeMap to load namespaces into'}, - {'name': 'path', - 'type': (str, Path, *SUPPORTED_ZARR_STORES), - 'doc': 'the path to the Zarr file or a supported Zarr store'}, - {'name': 'storage_options', 'type': dict, - 'doc': 'Zarr storage options to read remote folders', - 'default': None}, - {'name': 'namespaces', 'type': list, 'doc': 'the namespaces to load', 'default': None} - ) + @docval( + { + "name": "namespace_catalog", + "type": (NamespaceCatalog, TypeMap), + "doc": "the NamespaceCatalog or TypeMap to load namespaces into", + }, + { + "name": "path", + "type": (str, Path, *SUPPORTED_ZARR_STORES), + "doc": "the path to the Zarr file or a supported Zarr store", + }, + { + "name": "storage_options", + "type": dict, + "doc": "Zarr storage options to read remote folders", + "default": None, + }, + {"name": "namespaces", "type": list, "doc": "the namespaces to load", "default": None}, + ) def load_namespaces(cls, namespace_catalog, path, storage_options, namespaces=None): - ''' + """ Load cached namespaces from a file. - ''' + """ # TODO: how to use storage_options here? - f = zarr.open(path, mode='r', storage_options=storage_options) + f = zarr.open(path, mode="r", storage_options=storage_options) if SPEC_LOC_ATTR not in f.attrs: msg = "No cached namespaces found in %s" % path warnings.warn(msg) @@ -253,17 +280,26 @@ def load_namespaces(cls, namespace_catalog, path, storage_options, namespaces=No latest_version = list(ns_group.keys())[-1] ns_group = ns_group[latest_version] reader = ZarrSpecReader(ns_group) - namespace_catalog.load_namespaces('namespace', reader=reader) + namespace_catalog.load_namespaces("namespace", reader=reader) @docval( - {'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, - {'name': 'cache_spec', 'type': bool, 'doc': 'cache specification to file', 'default': True}, - {'name': 'link_data', 'type': bool, - 'doc': 'If not specified otherwise link (True) or copy (False) Datasets', 'default': True}, - {'name': 'exhaust_dci', 'type': bool, - 'doc': 'exhaust DataChunkIterators one at a time. If False, add ' + - 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end', - 'default': True}, + {"name": "container", "type": Container, "doc": "the Container object to write"}, + {"name": "cache_spec", "type": bool, "doc": "cache specification to file", "default": True}, + { + "name": "link_data", + "type": bool, + "doc": "If not specified otherwise link (True) or copy (False) Datasets", + "default": True, + }, + { + "name": "exhaust_dci", + "type": bool, + "doc": ( + "exhaust DataChunkIterators one at a time. If False, add " + "them to the internal queue self.__dci_queue and exhaust them concurrently at the end" + ), + "default": True, + }, { "name": "number_of_jobs", "type": int, @@ -276,9 +312,7 @@ def load_namespaces(cls, namespace_catalog, path, storage_options, namespaces=No { "name": "max_threads_per_process", "type": int, - "doc": ( - "Limits the number of threads used by each process. The default is None (no limits)." - ), + "doc": ("Limits the number of threads used by each process. The default is None (no limits)."), "default": None, }, { @@ -293,11 +327,9 @@ def load_namespaces(cls, namespace_catalog, path, storage_options, namespaces=No { "name": "consolidate_metadata", "type": bool, - "doc": ( - "Consolidate metadata into a single .zmetadata file in the root group to accelerate read." - ), + "doc": ("Consolidate metadata into a single .zmetadata file in the root group to accelerate read."), "default": True, - } + }, ) def write(self, **kwargs): """Overwrite the write method to add support for caching the specification and parallelization.""" @@ -330,16 +362,16 @@ def __cache_spec(self): ns_builder = NamespaceToBuilderHelper.convert_namespace(ns_catalog, ns_name) namespace = ns_catalog.get_namespace(ns_name) if namespace.version is None: - group_name = '%s/unversioned' % ns_name + group_name = "%s/unversioned" % ns_name else: - group_name = '%s/%s' % (ns_name, namespace.version) + group_name = "%s/%s" % (ns_name, namespace.version) ns_group = spec_group.require_group(group_name) writer = ZarrSpecWriter(ns_group) - ns_builder.export('namespace', writer=writer) + ns_builder.export("namespace", writer=writer) @docval( *get_docval(HDMFIO.export), - {'name': 'cache_spec', 'type': bool, 'doc': 'whether to cache the specification to file', 'default': True}, + {"name": "cache_spec", "type": bool, "doc": "whether to cache the specification to file", "default": True}, { "name": "number_of_jobs", "type": int, @@ -352,9 +384,7 @@ def __cache_spec(self): { "name": "max_threads_per_process", "type": int, - "doc": ( - "Limits the number of threads used by each process. The default is None (no limits)." - ), + "doc": "Limits the number of threads used by each process. The default is None (no limits).", "default": None, }, { @@ -371,12 +401,13 @@ def export(self, **kwargs): """Export data read from a file from any backend to Zarr. See :py:meth:`hdmf.backends.io.HDMFIO.export` for more details. """ - if self.mode != 'w': - raise UnsupportedOperation("Cannot export to file %s in mode '%s'. Please use mode 'w'." - % (self.source, self.mode)) + if self.mode != "w": + raise UnsupportedOperation( + "Cannot export to file %s in mode '%s'. Please use mode 'w'." % (self.source, self.mode) + ) - src_io = getargs('src_io', kwargs) - write_args, cache_spec = popargs('write_args', 'cache_spec', kwargs) + src_io = getargs("src_io", kwargs) + write_args, cache_spec = popargs("write_args", "cache_spec", kwargs) number_of_jobs, max_threads_per_process, multiprocessing_context = popargs( "number_of_jobs", "max_threads_per_process", "multiprocessing_context", kwargs ) @@ -387,24 +418,25 @@ def export(self, **kwargs): multiprocessing_context=multiprocessing_context, ) - if not isinstance(src_io, ZarrIO) and write_args.get('link_data', True): - raise UnsupportedOperation(f"Cannot export from non-Zarr backend { src_io.__class__.__name__} " + - "to Zarr with write argument link_data=True. " - + "Set write_args={'link_data': False}") + if not isinstance(src_io, ZarrIO) and write_args.get("link_data", True): + raise UnsupportedOperation( + f"Cannot export from non-Zarr backend { src_io.__class__.__name__} " + "to Zarr with write argument link_data=True. " + "Set write_args={'link_data': False}" + ) - write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder + write_args["export_source"] = src_io.source # pass export_source=src_io.source to write_builder ckwargs = kwargs.copy() - ckwargs['write_args'] = write_args - if not write_args.get('link_data', True): - ckwargs['clear_cache'] = True + ckwargs["write_args"] = write_args + if not write_args.get("link_data", True): + ckwargs["clear_cache"] = True super().export(**ckwargs) if cache_spec: # add any namespaces from the src_io that have not yet been loaded for namespace in src_io.manager.namespace_catalog.namespaces: if namespace not in self.manager.namespace_catalog.namespaces: self.manager.namespace_catalog.add_namespace( - name=namespace, - namespace=src_io.manager.namespace_catalog.get_namespace(namespace) + name=namespace, namespace=src_io.manager.namespace_catalog.get_namespace(namespace) ) self.__cache_spec() @@ -426,61 +458,60 @@ def get_written(self, builder, check_on_disk=False): written = written and self.get_builder_exists_on_disk(builder=builder) return written - @docval({'name': 'builder', 'type': Builder, 'doc': 'The builder of interest'}) + @docval({"name": "builder", "type": Builder, "doc": "The builder of interest"}) def get_builder_exists_on_disk(self, **kwargs): """ Convenience function to check whether a given builder exists on disk in this Zarr file. """ - builder = getargs('builder', kwargs) + builder = getargs("builder", kwargs) builder_path = self.get_builder_disk_path(builder=builder, filepath=None) exists_on_disk = os.path.exists(builder_path) return exists_on_disk - @docval({'name': 'builder', 'type': Builder, 'doc': 'The builder of interest'}, - {'name': 'filepath', 'type': str, - 'doc': 'The path to the Zarr file or None for this file', 'default': None}) + @docval( + {"name": "builder", "type": Builder, "doc": "The builder of interest"}, + {"name": "filepath", "type": str, "doc": "The path to the Zarr file or None for this file", "default": None}, + ) def get_builder_disk_path(self, **kwargs): - builder, filepath = getargs('builder', 'filepath', kwargs) + builder, filepath = getargs("builder", "filepath", kwargs) basepath = filepath if filepath is not None else self.source builder_path = os.path.join(basepath, self.__get_path(builder).lstrip("/")) return builder_path @docval( - {'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder object representing the NWBFile'}, + {"name": "builder", "type": GroupBuilder, "doc": "the GroupBuilder object representing the NWBFile"}, { - 'name': 'link_data', - 'type': bool, - 'doc': 'If not specified otherwise link (True) or copy (False) Zarr Datasets', - 'default': True + "name": "link_data", + "type": bool, + "doc": "If not specified otherwise link (True) or copy (False) Zarr Datasets", + "default": True, }, { - 'name': 'exhaust_dci', - 'type': bool, - 'doc': ( - 'Exhaust DataChunkIterators one at a time. If False, add ' - 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end' + "name": "exhaust_dci", + "type": bool, + "doc": ( + "Exhaust DataChunkIterators one at a time. If False, add " + "them to the internal queue self.__dci_queue and exhaust them concurrently at the end" ), - 'default': True, + "default": True, }, { - 'name': 'export_source', - 'type': str, - 'doc': 'The source of the builders when exporting', - 'default': None, + "name": "export_source", + "type": str, + "doc": "The source of the builders when exporting", + "default": None, }, { "name": "consolidate_metadata", "type": bool, - "doc": ( - "Consolidate metadata into a single .zmetadata file in the root group to accelerate read." - ), + "doc": "Consolidate metadata into a single .zmetadata file in the root group to accelerate read.", "default": True, - } + }, ) def write_builder(self, **kwargs): """Write a builder to disk.""" f_builder, link_data, exhaust_dci, export_source, consolidate_metadata = getargs( - 'builder', 'link_data', 'exhaust_dci', 'export_source', 'consolidate_metadata', kwargs + "builder", "link_data", "exhaust_dci", "export_source", "consolidate_metadata", kwargs ) for name, gbldr in f_builder.groups.items(): self.write_group( @@ -501,8 +532,9 @@ def write_builder(self, **kwargs): self.write_attributes(self.__file, f_builder.attributes) # the same as set_attributes in HDMF self.__dci_queue.exhaust_queue() # Write any remaining DataChunkIterators that have been queued self._written_builders.set_written(f_builder) - self.logger.debug("Done writing %s '%s' to path '%s'" % - (f_builder.__class__.__qualname__, f_builder.name, self.source)) + self.logger.debug( + "Done writing %s '%s' to path '%s'" % (f_builder.__class__.__qualname__, f_builder.name, self.source) + ) # Consolidate metadata for the entire file after everything has been written if consolidate_metadata: @@ -522,44 +554,55 @@ def __get_store_path(store): return fpath - def __open_file_consolidated(self, - store, - mode, - synchronizer=None, - storage_options=None): + def __open_file_consolidated(self, store, mode, synchronizer=None, storage_options=None): """ This method will check to see if the metadata has been consolidated. If so, use open_consolidated. """ # This check is just a safeguard for possible errors in the future. But this should never happen - if mode == 'r-': - raise ValueError('Mode r- not allowed for reading with consolidated metadata') + if mode == "r-": + raise ValueError("Mode r- not allowed for reading with consolidated metadata") try: - return zarr.open_consolidated(store=store, - mode=mode, - synchronizer=synchronizer, - storage_options=storage_options) + return zarr.open_consolidated( + store=store, + mode=mode, + synchronizer=synchronizer, + storage_options=storage_options, + ) except KeyError: # A KeyError is raised when the '/.zmetadata' does not exist - return zarr.open(store=store, - mode=mode, - synchronizer=synchronizer, - storage_options=storage_options) - - @docval({'name': 'parent', 'type': Group, 'doc': 'the parent Zarr object'}, - {'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder to write'}, - {'name': 'link_data', 'type': bool, - 'doc': 'If not specified otherwise link (True) or copy (False) Zarr Datasets', 'default': True}, - {'name': 'exhaust_dci', 'type': bool, - 'doc': 'exhaust DataChunkIterators one at a time. If False, add ' + - 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end', - 'default': True}, - {'name': 'export_source', 'type': str, - 'doc': 'The source of the builders when exporting', 'default': None}, - returns='the Group that was created', rtype='Group') + return zarr.open( + store=store, + mode=mode, + synchronizer=synchronizer, + storage_options=storage_options, + ) + + @docval( + {"name": "parent", "type": Group, "doc": "the parent Zarr object"}, + {"name": "builder", "type": GroupBuilder, "doc": "the GroupBuilder to write"}, + { + "name": "link_data", + "type": bool, + "doc": "If not specified otherwise link (True) or copy (False) Zarr Datasets", + "default": True, + }, + { + "name": "exhaust_dci", + "type": bool, + "doc": ( + "exhaust DataChunkIterators one at a time. If False, add " + "them to the internal queue self.__dci_queue and exhaust them concurrently at the end" + ), + "default": True, + }, + {"name": "export_source", "type": str, "doc": "The source of the builders when exporting", "default": None}, + returns="the Group that was created", + rtype="Group", + ) def write_group(self, **kwargs): """Write a GroupBuider to file""" parent, builder, link_data, exhaust_dci, export_source = getargs( - 'parent', 'builder', 'link_data', 'exhaust_dci', 'export_source', kwargs + "parent", "builder", "link_data", "exhaust_dci", "export_source", kwargs ) if self.get_written(builder): @@ -575,7 +618,7 @@ def write_group(self, **kwargs): builder=sub_builder, link_data=link_data, exhaust_dci=exhaust_dci, - export_source=export_source + export_source=export_source, ) datasets = builder.datasets @@ -600,13 +643,17 @@ def write_group(self, **kwargs): self._written_builders.set_written(builder) # record that the builder has been written return group - @docval({'name': 'obj', 'type': (Group, Array), 'doc': 'the Zarr object to add attributes to'}, - {'name': 'attributes', - 'type': dict, - 'doc': 'a dict containing the attributes on the Group or Dataset, indexed by attribute name'}) + @docval( + {"name": "obj", "type": (Group, Array), "doc": "the Zarr object to add attributes to"}, + { + "name": "attributes", + "type": dict, + "doc": "a dict containing the attributes on the Group or Dataset, indexed by attribute name", + }, + ) def write_attributes(self, **kwargs): """Set (i.e., write) the attributes on a given Zarr Group or Array.""" - obj, attributes = getargs('obj', 'attributes', kwargs) + obj, attributes = getargs("obj", "attributes", kwargs) for key, value in attributes.items(): # Case 1: list, set, tuple type attributes @@ -622,19 +669,24 @@ def write_attributes(self, **kwargs): # Numpy scalars and bytes are not JSON serializable. Try to convert to a serializable type instead except TypeError as e: try: - tmp = tuple([i.item() - if (isinstance(i, np.generic) and not isinstance(i, np.bytes_)) - else i.decode("utf-8") - if isinstance(i, (bytes, np.bytes_)) - else i - for i in value]) + # TODO: refactor this to be more readable + tmp = tuple( + [ + ( + i.item() + if (isinstance(i, np.generic) and not isinstance(i, np.bytes_)) + else i.decode("utf-8") if isinstance(i, (bytes, np.bytes_)) else i + ) + for i in value + ] + ) obj.attrs[key] = tmp except: # noqa: E722 raise TypeError(str(e) + " type=" + str(type(value)) + " data=" + str(value)) from e # Case 2: References elif isinstance(value, (Builder, ReferenceBuilder)): refs = self._create_ref(value, self.path) - tmp = {'zarr_dtype': 'object', 'value': refs} + tmp = {"zarr_dtype": "object", "value": refs} obj.attrs[key] = tmp # Case 3: Scalar attributes else: @@ -645,11 +697,12 @@ def write_attributes(self, **kwargs): except TypeError as e: try: val = value.item if isinstance(value, np.ndarray) else value - val = value.item() \ - if (isinstance(value, np.generic) and not isinstance(value, np.bytes_)) \ - else val.decode("utf-8") \ - if isinstance(value, (bytes, np.bytes_)) \ - else val + # TODO: refactor this to be more readable + val = ( + value.item() + if (isinstance(value, np.generic) and not isinstance(value, np.bytes_)) + else val.decode("utf-8") if isinstance(value, (bytes, np.bytes_)) else val + ) obj.attrs[key] = val except: # noqa: E722 msg = str(e) + "key=" + key + " type=" + str(type(value)) + " data=" + str(value) @@ -744,10 +797,10 @@ def resolve_ref(self, zarr_ref): 2) the target zarr object within the target file """ # Extract the path as defined in the zarr_ref object - if zarr_ref.get('source', None) is None: - source_file = str(zarr_ref['path']) + if zarr_ref.get("source", None) is None: + source_file = str(zarr_ref["path"]) else: - source_file = str(zarr_ref['source']) + source_file = str(zarr_ref["source"]) # Resolve the path relative to the current file if not self.is_remote(): source_file = os.path.abspath(source_file) @@ -757,15 +810,17 @@ def resolve_ref(self, zarr_ref): source_path = str(source_file).lstrip(".") source_file = root_path + source_path - object_path = zarr_ref.get('path', None) + object_path = zarr_ref.get("path", None) if object_path: target_name = os.path.basename(object_path) else: target_name = ROOT_NAME - target_zarr_obj = self.__open_file_consolidated(store=source_file, - mode='r', - storage_options=self.__storage_options) + target_zarr_obj = self.__open_file_consolidated( + store=source_file, + mode="r", + storage_options=self.__storage_options, + ) if object_path is not None: try: target_zarr_obj = target_zarr_obj[object_path] @@ -793,7 +848,7 @@ def _create_ref(self, ref_object, ref_link_source=None): path = self.__get_path(builder) # This is the internal path in the store to the item. # get the object id if available - object_id = builder.get('object_id', None) + object_id = builder.get("object_id", None) # determine the object_id of the source by following the parents of the builder until we find the root # the root builder should be the same as the source file containing the reference curr = builder @@ -801,7 +856,7 @@ def _create_ref(self, ref_object, ref_link_source=None): curr = curr.parent if curr: - source_object_id = curr.get('object_id', None) + source_object_id = curr.get("object_id", None) # We did not find ROOT_NAME as a parent. This should only happen if we have an invalid # file as a source, e.g., if during testing we use an arbitrary builder. We check this # anyways to avoid potential errors just in case @@ -814,9 +869,9 @@ def _create_ref(self, ref_object, ref_link_source=None): # between backends a user should always use export which takes care of creating a clean set of builders. if ref_link_source is None: # TODO: Refactor appending a dataset of references so this doesn't need to be called. - ref_link_source = (builder.source - if (builder.source is not None and os.path.isdir(builder.source)) - else self.source) + ref_link_source = ( + builder.source if (builder.source is not None and os.path.isdir(builder.source)) else self.source + ) if not isinstance(ref_link_source, str): # self.path is sometimes given as the ref_link_source. It can @@ -844,7 +899,8 @@ def _create_ref(self, ref_object, ref_link_source=None): source=rel_source, path=path, object_id=object_id, - source_object_id=source_object_id) + source_object_id=source_object_id, + ) return ref def __add_link__(self, parent, target_source, target_path, link_name): @@ -858,23 +914,25 @@ def __add_link__(self, parent, target_source, target_path, link_name): :param link_name: Name of the link :type link_name: str """ - if 'zarr_link' not in parent.attrs: - parent.attrs['zarr_link'] = [] - zarr_link = list(parent.attrs['zarr_link']) - if not isinstance(target_source, str): # a store + if "zarr_link" not in parent.attrs: + parent.attrs["zarr_link"] = [] + zarr_link = list(parent.attrs["zarr_link"]) + if not isinstance(target_source, str): # a store target_source = target_source.path - zarr_link.append({'source': target_source, 'path': target_path, 'name': link_name}) - parent.attrs['zarr_link'] = zarr_link + zarr_link.append({"source": target_source, "path": target_path, "name": link_name}) + parent.attrs["zarr_link"] = zarr_link - @docval({'name': 'parent', 'type': Group, 'doc': 'the parent Zarr object'}, - {'name': 'builder', 'type': LinkBuilder, 'doc': 'the LinkBuilder to write'}, - {'name': 'export_source', 'type': str, - 'doc': 'The source of the builders when exporting', 'default': None},) + @docval( + {"name": "parent", "type": Group, "doc": "the parent Zarr object"}, + {"name": "builder", "type": LinkBuilder, "doc": "the LinkBuilder to write"}, + {"name": "export_source", "type": str, "doc": "The source of the builders when exporting", "default": None}, + ) def write_link(self, **kwargs): - parent, builder, export_source = getargs('parent', 'builder', 'export_source', kwargs) + parent, builder, export_source = getargs("parent", "builder", "export_source", kwargs) if self.get_written(builder): - self.logger.debug("Skipping LinkBuilder '%s' already written to parent group '%s'" - % (builder.name, parent.name)) + self.logger.debug( + "Skipping LinkBuilder '%s' already written to parent group '%s'" % (builder.name, parent.name) + ) return self.logger.debug("Writing LinkBuilder '%s' to parent group '%s'" % (builder.name, parent.name)) @@ -925,49 +983,64 @@ def __setup_chunked_dataset__(cls, parent, name, data, options=None): """ io_settings = {} if options is not None: - if 'io_settings' in options: - io_settings = options.get('io_settings') + if "io_settings" in options: + io_settings = options.get("io_settings") # Define the chunking options if the user has not set them explicitly. We need chunking for the iterative write. - if 'chunks' not in io_settings: + if "chunks" not in io_settings: recommended_chunks = data.recommended_chunk_shape() - io_settings['chunks'] = True if recommended_chunks is None else recommended_chunks + io_settings["chunks"] = True if recommended_chunks is None else recommended_chunks # Define the shape of the data if not provided by the user - if 'shape' not in io_settings: - io_settings['shape'] = data.recommended_data_shape() - if 'dtype' not in io_settings: - if (options is not None) and ('dtype' in options): - io_settings['dtype'] = options['dtype'] + if "shape" not in io_settings: + io_settings["shape"] = data.recommended_data_shape() + if "dtype" not in io_settings: + if (options is not None) and ("dtype" in options): + io_settings["dtype"] = options["dtype"] else: - io_settings['dtype'] = data.dtype - if isinstance(io_settings['dtype'], str): + io_settings["dtype"] = data.dtype + if isinstance(io_settings["dtype"], str): # map to real dtype if we were given a string - io_settings['dtype'] = cls.__dtypes.get(io_settings['dtype']) + io_settings["dtype"] = cls.__dtypes.get(io_settings["dtype"]) try: dset = parent.create_dataset(name, **io_settings) - dset.attrs['zarr_dtype'] = np.dtype(io_settings['dtype']).str + dset.attrs["zarr_dtype"] = np.dtype(io_settings["dtype"]).str except Exception as exc: raise Exception("Could not create dataset %s in %s" % (name, parent.name)) from exc return dset - @docval({'name': 'parent', 'type': Group, 'doc': 'the parent Zarr object'}, # noqa: C901 - {'name': 'builder', 'type': DatasetBuilder, 'doc': 'the DatasetBuilder to write'}, - {'name': 'link_data', 'type': bool, - 'doc': 'If not specified otherwise link (True) or copy (False) Zarr Datasets', 'default': True}, - {'name': 'exhaust_dci', 'type': bool, - 'doc': 'exhaust DataChunkIterators one at a time. If False, add ' + - 'them to the internal queue self.__dci_queue and exhaust them concurrently at the end', - 'default': True}, - {'name': 'force_data', 'type': None, - 'doc': 'Used internally to force the data being used when we have to load the data', 'default': None}, - {'name': 'export_source', 'type': str, - 'doc': 'The source of the builders when exporting', 'default': None}, - returns='the Zarr array that was created', rtype=Array) + @docval( + {"name": "parent", "type": Group, "doc": "the parent Zarr object"}, # noqa: C901 + {"name": "builder", "type": DatasetBuilder, "doc": "the DatasetBuilder to write"}, + { + "name": "link_data", + "type": bool, + "doc": "If not specified otherwise link (True) or copy (False) Zarr Datasets", + "default": True, + }, + { + "name": "exhaust_dci", + "type": bool, + "doc": ( + "exhaust DataChunkIterators one at a time. If False, add " + "them to the internal queue self.__dci_queue and exhaust them concurrently at the end" + ), + "default": True, + }, + { + "name": "force_data", + "type": None, + "doc": "Used internally to force the data being used when we have to load the data", + "default": None, + }, + {"name": "export_source", "type": str, "doc": "The source of the builders when exporting", "default": None}, + returns="the Zarr array that was created", + rtype=Array, + ) def write_dataset(self, **kwargs): # noqa: C901 parent, builder, link_data, exhaust_dci, export_source = getargs( - 'parent', 'builder', 'link_data', 'exhaust_dci', 'export_source', kwargs + "parent", "builder", "link_data", "exhaust_dci", "export_source", kwargs ) - force_data = getargs('force_data', kwargs) + force_data = getargs("force_data", kwargs) if exhaust_dci and self.__dci_queue is None: self.__dci_queue = ZarrIODataChunkIteratorQueue() @@ -983,17 +1056,17 @@ def write_dataset(self, **kwargs): # noqa: C901 data = ZarrDataIO.from_h5py_dataset(h5dataset=data) # Separate data values and io_settings for write if isinstance(data, ZarrDataIO): - options['io_settings'] = data.io_settings + options["io_settings"] = data.io_settings link_data = data.link_data data = data.data else: - options['io_settings'] = {} + options["io_settings"] = {} if builder.dimension_labels is not None: - builder.attributes['_ARRAY_DIMENSIONS'] = builder.dimension_labels + builder.attributes["_ARRAY_DIMENSIONS"] = builder.dimension_labels attributes = builder.attributes - options['dtype'] = builder.dtype + options["dtype"] = builder.dtype linked = False @@ -1003,12 +1076,12 @@ def write_dataset(self, **kwargs): # noqa: C901 # copy the dataset data_filename = self.__get_store_path(data.store) if link_data: - if export_source is None: # not exporting + if export_source is None: # not exporting self.__add_link__(parent, data_filename, data.name, name) linked = True dset = None - else: # exporting - data_parent = '/'.join(data.name.split('/')[:-1]) + else: # exporting + data_parent = "/".join(data.name.split("/")[:-1]) # Case 1: The dataset is NOT in the export source, create a link to preserve the external link. # I have three files, FileA, FileB, FileC. I want to export FileA to FileB. FileA has an # EXTERNAL link to a dataset in Filec. This case preserves the link to FileC to also be in FileB. @@ -1040,14 +1113,16 @@ def write_dataset(self, **kwargs): # noqa: C901 # If we have a dataset of containers we need to make the references to the containers if len(data) > 0 and isinstance(data[0], Container): ref_data = [self._create_ref(data[i], ref_link_source=self.path) for i in range(len(data))] - shape = (len(data), ) - type_str = 'object' - dset = parent.require_dataset(name, - shape=shape, - dtype=object, - object_codec=self.__codec_cls(), - **options['io_settings']) - dset.attrs['zarr_dtype'] = type_str + shape = (len(data),) + type_str = "object" + dset = parent.require_dataset( + name, + shape=shape, + dtype=object, + object_codec=self.__codec_cls(), + **options["io_settings"], + ) + dset.attrs["zarr_dtype"] = type_str dset[:] = ref_data self._written_builders.set_written(builder) # record that the builder has been written # If we have a regular dataset, then load the data and write the builder after load @@ -1059,22 +1134,26 @@ def write_dataset(self, **kwargs): # noqa: C901 # We can/should not update the data in the builder itself so we load the data here and instead # force write_dataset when we call it recursively to use the data we loaded, rather than the # dataset that is set on the builder - dset = self.write_dataset(parent=parent, - builder=builder, - link_data=link_data, - force_data=data[:], - export_source=export_source) + dset = self.write_dataset( + parent=parent, + builder=builder, + link_data=link_data, + force_data=data[:], + export_source=export_source, + ) self._written_builders.set_written(builder) # record that the builder has been written # Write a compound dataset - elif isinstance(options['dtype'], list): + elif isinstance(options["dtype"], list): refs = list() type_str = list() - for i, dts in enumerate(options['dtype']): - if self.__is_ref(dts['dtype']): + for i, dts in enumerate(options["dtype"]): + if self.__is_ref(dts["dtype"]): refs.append(i) - type_str.append({'name': dts['name'], 'dtype': 'object'}) + type_str.append({"name": dts["name"], "dtype": "object"}) else: - i = list([dts, ]) + i = [ + dts, + ] t = self.__resolve_dtype_helper__(i) type_str.append(self.__serial_dtype__(t)[0]) @@ -1097,19 +1176,24 @@ def write_dataset(self, **kwargs): # noqa: C901 # dtype = self.__resolve_dtype_helper__(options['dtype']) new_dtype = [] - for field in options['dtype']: - if field['dtype'] is str or field['dtype'] in ( - 'str', 'text', 'utf', 'utf8', 'utf-8', 'isodatetime' + for field in options["dtype"]: + if field["dtype"] is str or field["dtype"] in ( + "str", + "text", + "utf", + "utf8", + "utf-8", + "isodatetime", ): # Zarr does not support variable length strings - new_dtype.append((field['name'], 'O')) - elif isinstance(field['dtype'], dict): + new_dtype.append((field["name"], "O")) + elif isinstance(field["dtype"], dict): # eg. for some references, dtype will be of the form # {'target_type': 'Baz', 'reftype': 'object'} # which should just get serialized as an object - new_dtype.append((field['name'], 'O')) + new_dtype.append((field["name"], "O")) else: - new_dtype.append((field['name'], self.__resolve_dtype_helper__(field['dtype']))) + new_dtype.append((field["name"], self.__resolve_dtype_helper__(field["dtype"]))) dtype = np.dtype(new_dtype) # cast and store compound dataset @@ -1119,34 +1203,36 @@ def write_dataset(self, **kwargs): # noqa: C901 shape=(len(arr),), dtype=dtype, object_codec=self.__codec_cls(), - **options['io_settings'] + **options["io_settings"], ) - dset.attrs['zarr_dtype'] = type_str + dset.attrs["zarr_dtype"] = type_str dset[...] = arr else: # write a compound datatype dset = self.__list_fill__(parent, name, data, options) # Write a dataset of references - elif self.__is_ref(options['dtype']): + elif self.__is_ref(options["dtype"]): # Note: ref_link_source is set to self.path because we do not do external references # We only support external links. if isinstance(data, ReferenceBuilder): shape = (1,) - type_str = 'object' + type_str = "object" refs = self._create_ref(data, ref_link_source=self.path) else: - shape = (len(data), ) - type_str = 'object' + shape = (len(data),) + type_str = "object" refs = [self._create_ref(item, ref_link_source=self.path) for item in data] - dset = parent.require_dataset(name, - shape=shape, - dtype=object, - object_codec=self.__codec_cls(), - **options['io_settings']) + dset = parent.require_dataset( + name, + shape=shape, + dtype=object, + object_codec=self.__codec_cls(), + **options["io_settings"], + ) self._written_builders.set_written(builder) # record that the builder has been written - dset.attrs['zarr_dtype'] = type_str - if hasattr(refs, '__len__'): + dset.attrs["zarr_dtype"] = type_str + if hasattr(refs, "__len__"): dset[:] = np.array(refs) else: dset[0] = refs @@ -1158,7 +1244,7 @@ def write_dataset(self, **kwargs): # noqa: C901 elif isinstance(data, AbstractDataChunkIterator): dset = self.__setup_chunked_dataset__(parent, name, data, options) self.__dci_queue.append(dataset=dset, data=data) - elif hasattr(data, '__len__'): + elif hasattr(data, "__len__"): dset = self.__list_fill__(parent, name, data, options) else: dset = self.__scalar_fill__(parent, name, data, options) @@ -1191,7 +1277,7 @@ def write_dataset(self, **kwargs): # noqa: C901 "utf8": str, "utf-8": str, "ascii": bytes, - "bytes": bytes, + "bytes": bytes, "str": str, "isodatetime": str, "string_": bytes, @@ -1214,13 +1300,13 @@ def __serial_dtype__(cls, dtype): ret = list() for n in dtype.names: item = dict() - item['name'] = n - item['dtype'] = cls.__serial_dtype__(dtype[n]) + item["name"] = n + item["dtype"] = cls.__serial_dtype__(dtype[n]) ret.append(item) return ret # TODO Does not work when Reference in compound datatype elif dtype == ZarrReference: - return 'object' + return "object" @classmethod def __resolve_dtype__(cls, dtype, data): @@ -1238,11 +1324,11 @@ def __resolve_dtype_helper__(cls, dtype): elif isinstance(dtype, str): return cls.__dtypes.get(dtype) elif isinstance(dtype, dict): - return cls.__dtypes.get(dtype['reftype']) + return cls.__dtypes.get(dtype["reftype"]) elif isinstance(dtype, list): - return np.dtype([(x['name'], cls.__resolve_dtype_helper__(x['dtype'])) for x in dtype]) + return np.dtype([(x["name"], cls.__resolve_dtype_helper__(x["dtype"])) for x in dtype]) else: - raise ValueError(f'Cant resolve dtype {dtype}') + raise ValueError(f"Cant resolve dtype {dtype}") @classmethod def get_type(cls, data): @@ -1250,50 +1336,50 @@ def get_type(cls, data): return cls.__dtypes.get("str") elif isinstance(data, bytes): return cls.__dtypes.get("bytes") - elif not hasattr(data, '__len__'): + elif not hasattr(data, "__len__"): return type(data) else: if len(data) == 0: - raise ValueError('cannot determine type for empty data') + raise ValueError("cannot determine type for empty data") return cls.get_type(data[0]) - __reserve_attribute = ('zarr_dtype', 'zarr_link') + __reserve_attribute = ("zarr_dtype", "zarr_link") def __list_fill__(self, parent, name, data, options=None): # noqa: C901 dtype = None io_settings = dict() if options is not None: - dtype = options.get('dtype') - if options.get('io_settings') is not None: - io_settings = options.get('io_settings') + dtype = options.get("dtype") + if options.get("io_settings") is not None: + io_settings = options.get("io_settings") # Determine the dtype if not isinstance(dtype, type): try: dtype = self.__resolve_dtype__(dtype, data) except Exception as exc: - msg = 'cannot add %s to %s - could not determine type' % (name, parent.name) # noqa: F821 + msg = "cannot add %s to %s - could not determine type" % (name, parent.name) # noqa: F821 raise Exception(msg) from exc # Set the type_str type_str = self.__serial_dtype__(dtype) # Determine the shape and update the dtype if necessary when dtype==object - if 'shape' in io_settings: # Use the shape set by the user - data_shape = io_settings.pop('shape') + if "shape" in io_settings: # Use the shape set by the user + data_shape = io_settings.pop("shape") # If we have a numeric numpy-like array (e.g., numpy.array or h5py.Dataset) then use its shape elif isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.number) or dtype == np.bool_: # HDMF's get_data_shape may return the maxshape of an HDF5 dataset which can include None values # which Zarr does not allow for dataset shape. Check for the shape attribute first before falling # back on get_data_shape - if hasattr(data, 'shape') and data.shape is not None: + if hasattr(data, "shape") and data.shape is not None: data_shape = data.shape # This is a fall-back just in case. However this should not happen for standard numpy and h5py arrays - else: # pragma: no cover - data_shape = get_data_shape(data) # pragma: no cover + else: # pragma: no cover + data_shape = get_data_shape(data) # pragma: no cover # Deal with object dtype elif isinstance(dtype, np.dtype): data = data[:] # load the data in case we come from HDF5 or another on-disk data source we don't know - data_shape = (len(data), ) + data_shape = (len(data),) # if we have a compound data type if dtype.names: data_shape = get_data_shape(data) @@ -1302,7 +1388,7 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 for substype in dtype.fields.items(): if np.issubdtype(substype[1][0], np.flexible) or np.issubdtype(substype[1][0], np.object_): dtype = object - io_settings['object_codec'] = self.__codec_cls() + io_settings["object_codec"] = self.__codec_cls() break # sometimes bytes and strings can hide as object in numpy array so lets try # to write those as strings and bytes rather than as objects @@ -1316,17 +1402,17 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 # Set encoding for objects else: dtype = object - io_settings['object_codec'] = self.__codec_cls() + io_settings["object_codec"] = self.__codec_cls() # Determine the shape from the data if all other cases have not been hit else: data_shape = get_data_shape(data) # Create the dataset dset = parent.require_dataset(name, shape=data_shape, dtype=dtype, **io_settings) - dset.attrs['zarr_dtype'] = type_str + dset.attrs["zarr_dtype"] = type_str # Write the data to file - if dtype == object: # noqa: E721 + if dtype == object: # noqa: E721 for c in np.ndindex(data_shape): o = data for i in c: @@ -1344,7 +1430,7 @@ def __list_fill__(self, parent, name, data, options=None): # noqa: C901 except ValueError: for i in range(len(data)): dset[i] = data[i] - except TypeError: # If data is an h5py.Dataset with strings, they may need to be decoded + except TypeError: # If data is an h5py.Dataset with strings, they may need to be decoded for c in np.ndindex(data_shape): o = data for i in c: @@ -1357,26 +1443,26 @@ def __scalar_fill__(self, parent, name, data, options=None): dtype = None io_settings = dict() if options is not None: - dtype = options.get('dtype') - io_settings = options.get('io_settings') + dtype = options.get("dtype") + io_settings = options.get("io_settings") if io_settings is None: io_settings = dict() if not isinstance(dtype, type): try: dtype = self.__resolve_dtype__(dtype, data) except Exception as exc: - msg = 'cannot add %s to %s - could not determine type' % (name, parent.name) + msg = "cannot add %s to %s - could not determine type" % (name, parent.name) raise Exception(msg) from exc - if dtype == object: # noqa: E721 - io_settings['object_codec'] = self.__codec_cls() + if dtype == object: # noqa: E721 + io_settings["object_codec"] = self.__codec_cls() - dset = parent.require_dataset(name, shape=(1, ), dtype=dtype, **io_settings) + dset = parent.require_dataset(name, shape=(1,), dtype=dtype, **io_settings) dset[:] = data - type_str = 'scalar' - dset.attrs['zarr_dtype'] = type_str + type_str = "scalar" + dset.attrs["zarr_dtype"] = type_str return dset - @docval(returns='a GroupBuilder representing the NWB Dataset', rtype='GroupBuilder') + @docval(returns="a GroupBuilder representing the NWB Dataset", rtype="GroupBuilder") def read_builder(self): f_builder = self.__read_group(self.__file, ROOT_NAME) return f_builder @@ -1387,31 +1473,37 @@ def __set_built(self, zarr_obj, builder): path = os.path.join(fpath, path) self.__built.setdefault(path, builder) - @docval({'name': 'zarr_obj', 'type': (Array, Group), - 'doc': 'the Zarr object to the corresponding Container/Data object for'}) + @docval( + { + "name": "zarr_obj", + "type": (Array, Group), + "doc": "the Zarr object to the corresponding Container/Data object for", + } + ) def get_container(self, **kwargs): """ Get the container for the corresponding Zarr Group or Dataset :raises ValueError: When no builder has been constructed yet for the given h5py object """ - zarr_obj = getargs('zarr_obj', kwargs) + zarr_obj = getargs("zarr_obj", kwargs) builder = self.get_builder(zarr_obj) container = self.manager.construct(builder) return container # TODO: This method should be moved to HDMFIO - @docval({'name': 'zarr_obj', 'type': (Array, Group), - 'doc': 'the Zarr object to the corresponding Builder object for'}) + @docval( + {"name": "zarr_obj", "type": (Array, Group), "doc": "the Zarr object to the corresponding Builder object for"} + ) def get_builder(self, **kwargs): # TODO: move this to HDMFIO (define skeleton in there at least) """ Get the builder for the corresponding Group or Dataset :raises ValueError: When no builder has been constructed """ - zarr_obj = kwargs['zarr_obj'] + zarr_obj = kwargs["zarr_obj"] builder = self.__get_built(zarr_obj) if builder is None: - msg = '%s has not been built' % (zarr_obj.name) + msg = "%s has not been built" % (zarr_obj.name) raise ValueError(msg) return builder @@ -1474,10 +1566,10 @@ def __read_links(self, zarr_obj, parent): :type parent: GroupBuilder """ # read links - if 'zarr_link' in zarr_obj.attrs: - links = zarr_obj.attrs['zarr_link'] + if "zarr_link" in zarr_obj.attrs: + links = zarr_obj.attrs["zarr_link"] for link in links: - link_name = link['name'] + link_name = link["name"] target_name, target_zarr_obj = self.resolve_ref(link) # NOTE: __read_group and __read_dataset return the cached builders if the target has already been built if isinstance(target_zarr_obj, Group): @@ -1494,9 +1586,9 @@ def __read_dataset(self, zarr_obj, name): if ret is not None: return ret - if 'zarr_dtype' in zarr_obj.attrs: - zarr_dtype = zarr_obj.attrs['zarr_dtype'] - elif hasattr(zarr_obj, 'dtype'): # Fallback for invalid files that are missing zarr_type + if "zarr_dtype" in zarr_obj.attrs: + zarr_dtype = zarr_obj.attrs["zarr_dtype"] + elif hasattr(zarr_obj, "dtype"): # Fallback for invalid files that are missing zarr_type zarr_dtype = zarr_obj.dtype warnings.warn( "Inferred dtype from zarr type. Dataset missing zarr_dtype: " + str(name) + " " + str(zarr_obj) @@ -1509,35 +1601,37 @@ def __read_dataset(self, zarr_obj, name): else: source = zarr_obj.store.path - kwargs = {"attributes": self.__read_attrs(zarr_obj), - "dtype": zarr_dtype, - "maxshape": zarr_obj.shape, - "chunks": not (zarr_obj.shape == zarr_obj.chunks), - "source": source} - dtype = kwargs['dtype'] + kwargs = { + "attributes": self.__read_attrs(zarr_obj), + "dtype": zarr_dtype, + "maxshape": zarr_obj.shape, + "chunks": not (zarr_obj.shape == zarr_obj.chunks), + "source": source, + } + dtype = kwargs["dtype"] # By default, use the zarr.core.Array as data for lazy data load data = zarr_obj # Read scalar dataset - if dtype == 'scalar': + if dtype == "scalar": data = zarr_obj[()] if isinstance(dtype, list): # Check compound dataset where one of the subsets contains references has_reference = False for i, dts in enumerate(dtype): - if dts['dtype'] == 'object': # check items for object reference + if dts["dtype"] == "object": # check items for object reference has_reference = True break - retrieved_dtypes = [dtype_dict['dtype'] for dtype_dict in dtype] + retrieved_dtypes = [dtype_dict["dtype"] for dtype_dict in dtype] if has_reference: data = BuilderZarrTableDataset(zarr_obj, self, retrieved_dtypes) elif self.__is_ref(dtype): # Array of references data = BuilderZarrReferenceDataset(data, self) - kwargs['data'] = data + kwargs["data"] = data if name is None: name = str(os.path.basename(zarr_obj.name)) ret = DatasetBuilder(name, **kwargs) # create builder object for dataset @@ -1551,9 +1645,9 @@ def __read_attrs(self, zarr_obj): for k in zarr_obj.attrs.keys(): if k not in self.__reserve_attribute: v = zarr_obj.attrs[k] - if isinstance(v, dict) and 'zarr_dtype' in v: - if v['zarr_dtype'] == 'object': - target_name, target_zarr_obj = self.resolve_ref(v['value']) + if isinstance(v, dict) and "zarr_dtype" in v: + if v["zarr_dtype"] == "object": + target_name, target_zarr_obj = self.resolve_ref(v["value"]) if isinstance(target_zarr_obj, zarr.hierarchy.Group): ret[k] = self.__read_group(target_zarr_obj, target_name) else: diff --git a/src/hdmf_zarr/nwb.py b/src/hdmf_zarr/nwb.py index 2b4d3bfb..97a62573 100644 --- a/src/hdmf_zarr/nwb.py +++ b/src/hdmf_zarr/nwb.py @@ -1,13 +1,11 @@ """Module with Zarr backend for NWB for integration with PyNWB""" + from pathlib import Path from .backend import ZarrIO, SUPPORTED_ZARR_STORES -from hdmf.utils import (docval, - popargs, - get_docval) +from hdmf.utils import docval, popargs, get_docval from hdmf.backends.io import HDMFIO -from hdmf.build import (BuildManager, - TypeMap) +from hdmf.build import BuildManager, TypeMap from pynwb import get_manager, get_type_map @@ -19,25 +17,34 @@ class NWBZarrIO(ZarrIO): is to perform default setup for BuildManager, loading or namespaces etc., in the context of the NWB format. """ - @docval(*get_docval(ZarrIO.__init__), - {'name': 'load_namespaces', 'type': bool, - 'doc': 'whether or not to load cached namespaces from given path - not applicable in write mode', - 'default': True}, - {'name': 'extensions', 'type': (str, TypeMap, list), - 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', - 'default': None}) + + @docval( + *get_docval(ZarrIO.__init__), + { + "name": "load_namespaces", + "type": bool, + "doc": "whether or not to load cached namespaces from given path - not applicable in write mode", + "default": True, + }, + { + "name": "extensions", + "type": (str, TypeMap, list), + "doc": "a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps", + "default": None, + }, + ) def __init__(self, **kwargs): - path, mode, manager, extensions, load_namespaces, synchronizer, storage_options = \ - popargs('path', 'mode', 'manager', 'extensions', - 'load_namespaces', 'synchronizer', 'storage_options', kwargs) + path, mode, manager, extensions, load_namespaces, synchronizer, storage_options = popargs( + "path", "mode", "manager", "extensions", "load_namespaces", "synchronizer", "storage_options", kwargs + ) - io_modes_that_create_file = ['w', 'w-', 'x'] + io_modes_that_create_file = ["w", "w-", "x"] if mode in io_modes_that_create_file or manager is not None or extensions is not None: load_namespaces = False if load_namespaces: tm = get_type_map() - super(NWBZarrIO, self).load_namespaces(tm, path, storage_options) + super().load_namespaces(tm, path, storage_options) manager = BuildManager(tm) else: if manager is not None and extensions is not None: @@ -46,34 +53,38 @@ def __init__(self, **kwargs): manager = get_manager(extensions=extensions) elif manager is None: manager = get_manager() - super(NWBZarrIO, self).__init__(path, - manager=manager, - mode=mode, - synchronizer=synchronizer, - storage_options=storage_options) + super().__init__(path, manager=manager, mode=mode, synchronizer=synchronizer, storage_options=storage_options) - @docval({'name': 'src_io', 'type': HDMFIO, 'doc': 'the HDMFIO object for reading the data to export'}, - {'name': 'nwbfile', 'type': 'NWBFile', - 'doc': 'the NWBFile object to export. If None, then the entire contents of src_io will be exported', - 'default': None}, - {'name': 'write_args', 'type': dict, 'doc': 'arguments to pass to :py:meth:`write_builder`', - 'default': dict()}) + @docval( + {"name": "src_io", "type": HDMFIO, "doc": "the HDMFIO object for reading the data to export"}, + { + "name": "nwbfile", + "type": "NWBFile", + "doc": "the NWBFile object to export. If None, then the entire contents of src_io will be exported", + "default": None, + }, + {"name": "write_args", "type": dict, "doc": "arguments to pass to :py:meth:`write_builder`", "default": dict()}, + ) def export(self, **kwargs): - nwbfile = popargs('nwbfile', kwargs) - kwargs['container'] = nwbfile + nwbfile = popargs("nwbfile", kwargs) + kwargs["container"] = nwbfile super().export(**kwargs) @staticmethod - @docval({'name': 'path', - 'type': (str, Path, *SUPPORTED_ZARR_STORES), - 'doc': 'the path to the Zarr file or a supported Zarr store'}, - is_method=False) + @docval( + { + "name": "path", + "type": (str, Path, *SUPPORTED_ZARR_STORES), + "doc": "the path to the Zarr file or a supported Zarr store", + }, + is_method=False, + ) def read_nwb(**kwargs): """ Helper factory method for reading an NWB file and return the NWBFile object """ # Retrieve the filepath - path = popargs('path', kwargs) + path = popargs("path", kwargs) if isinstance(path, Path): path = str(path) # determine default storage options to use when opening a file from S3 diff --git a/src/hdmf_zarr/utils.py b/src/hdmf_zarr/utils.py index 1c012a22..e51b137b 100644 --- a/src/hdmf_zarr/utils.py +++ b/src/hdmf_zarr/utils.py @@ -1,4 +1,5 @@ """Collection of utility I/O classes for the ZarrIO backend store.""" + import gc import traceback import multiprocessing @@ -47,13 +48,14 @@ class ZarrIODataChunkIteratorQueue(deque): Note that "fork" is only available on UNIX systems (not Windows). :type multiprocessing_context: string or None """ + def __init__( self, number_of_jobs: int = 1, max_threads_per_process: Union[None, int] = None, multiprocessing_context: Union[None, Literal["fork", "spawn"]] = None, ): - self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) + self.logger = logging.getLogger("%s.%s" % (self.__class__.__module__, self.__class__.__qualname__)) self.number_of_jobs = number_of_jobs self.max_threads_per_process = max_threads_per_process @@ -118,8 +120,7 @@ def exhaust_queue(self): display_progress = False r_bar_in_MB = ( - "| {n_fmt}/{total_fmt} MB [Elapsed: {elapsed}, " - "Remaining: {remaining}, Rate:{rate_fmt}{postfix}]" + "| {n_fmt}/{total_fmt} MB [Elapsed: {elapsed}, Remaining: {remaining}, Rate:{rate_fmt}{postfix}]" ) bar_format = "{l_bar}{bar}" + f"{r_bar_in_MB}" progress_bar_options = dict( @@ -128,7 +129,7 @@ def exhaust_queue(self): bar_format=bar_format, unit="MB", ) - for (zarr_dataset, iterator) in iter(self): + for zarr_dataset, iterator in iter(self): # Parallel write only works well with GenericDataChunkIterators # Due to perfect alignment between chunks and buffers if not isinstance(iterator, GenericDataChunkIterator): @@ -149,7 +150,8 @@ def exhaust_queue(self): display_progress = display_progress or iterator.display_progress iterator.display_progress = False per_iterator_progress_options = { - key: value for key, value in iterator.progress_bar_options.items() + key: value + for key, value in iterator.progress_bar_options.items() if key not in ["desc", "total", "file"] } progress_bar_options.update(**per_iterator_progress_options) @@ -158,9 +160,9 @@ def exhaust_queue(self): for buffer_selection in iterator.buffer_selection_generator: buffer_map_args = (zarr_dataset.store.path, zarr_dataset.path, iterator, buffer_selection) buffer_map.append(buffer_map_args) - buffer_size_in_MB = math.prod( - [slice_.stop - slice_.start for slice_ in buffer_selection] - ) * iterator_itemsize / 1e6 + buffer_size_in_MB = ( + math.prod([slice_.stop - slice_.start for slice_ in buffer_selection]) * iterator_itemsize / 1e6 + ) size_in_MB_per_iteration.append(buffer_size_in_MB) progress_bar_options.update( total=int(sum(size_in_MB_per_iteration)), # int() to round down to nearest integer for better display @@ -168,7 +170,7 @@ def exhaust_queue(self): if parallelizable_iterators: # Avoid spinning up ProcessPool if no candidates during this exhaustion # Remove candidates for parallelization from the queue - for (zarr_dataset, iterator) in parallelizable_iterators: + for zarr_dataset, iterator in parallelizable_iterators: self.remove((zarr_dataset, iterator)) operation_to_run = self._write_buffer_zarr @@ -182,7 +184,7 @@ def exhaust_queue(self): operation_to_run, process_initialization, initialization_arguments, - self.max_threads_per_process + self.max_threads_per_process, ), ) as executor: results = executor.map(self.function_wrapper, buffer_map) @@ -263,7 +265,7 @@ def initializer_wrapper( operation_to_run: callable, process_initialization: callable, initialization_arguments: Iterable, # TODO: eventually standardize with typing.Iterable[typing.Any] - max_threads_per_process: Optional[int] = None + max_threads_per_process: Optional[int] = None, ): # keyword arguments here are just for readability, ProcessPool only takes a tuple """ Needed as a part of a bug fix with cloud memory leaks discovered by SpikeInterface team. @@ -320,7 +322,7 @@ def function_wrapper(args: Tuple[str, str, AbstractDataChunkIterator, Tuple[slic zarr_store_path, relative_dataset_path, iterator, - buffer_selection + buffer_selection, ) else: with threadpool_limits(limits=max_threads_per_process): @@ -338,25 +340,27 @@ class ZarrSpecWriter(SpecWriter): Class used to write format specs to Zarr """ - @docval({'name': 'group', 'type': Group, 'doc': 'the Zarr file to write specs to'}) + @docval({"name": "group", "type": Group, "doc": "the Zarr file to write specs to"}) def __init__(self, **kwargs): - self.__group = getargs('group', kwargs) + self.__group = getargs("group", kwargs) @staticmethod def stringify(spec): """ Converts a spec into a JSON string to write to a dataset """ - return json.dumps(spec, separators=(',', ':')) + return json.dumps(spec, separators=(",", ":")) def __write(self, d, name): data = self.stringify(d) - dset = self.__group.require_dataset(name, - shape=(1, ), - dtype=object, - object_codec=numcodecs.JSON(), - compressor=None) - dset.attrs['zarr_dtype'] = 'scalar' + dset = self.__group.require_dataset( + name, + shape=(1,), + dtype=object, + object_codec=numcodecs.JSON(), + compressor=None, + ) + dset.attrs["zarr_dtype"] = "scalar" dset[0] = data return dset @@ -366,7 +370,7 @@ def write_spec(self, spec, path): def write_namespace(self, namespace, path): """Write a namespace to the given path""" - return self.__write({'namespaces': [namespace]}, path) + return self.__write({"namespaces": [namespace]}, path) class ZarrSpecReader(SpecReader): @@ -374,9 +378,9 @@ class ZarrSpecReader(SpecReader): Class to read format specs from Zarr """ - @docval({'name': 'group', 'type': Group, 'doc': 'the Zarr file to read specs from'}) + @docval({"name": "group", "type": Group, "doc": "the Zarr file to read specs from"}) def __init__(self, **kwargs): - self.__group = getargs('group', kwargs) + self.__group = getargs("group", kwargs) source = "%s:%s" % (os.path.abspath(self.__group.store.path), self.__group.name) super().__init__(source=source) self.__cache = None @@ -394,7 +398,7 @@ def read_namespace(self, ns_path): """Read a namespace from the given path""" if self.__cache is None: self.__cache = self.__read(ns_path) - ret = self.__cache['namespaces'] + ret = self.__cache["namespaces"] return ret @@ -404,63 +408,81 @@ class ZarrDataIO(DataIO): for data arrays. """ - @docval({'name': 'data', - 'type': (np.ndarray, list, tuple, zarr.Array, Iterable), - 'doc': 'the data to be written. NOTE: If an zarr.Array is used, all other settings but link_data' + - ' will be ignored as the dataset will either be linked to or copied as is in ZarrIO.'}, - {'name': 'chunks', - 'type': (list, tuple), - 'doc': 'Chunk shape', - 'default': None}, - {'name': 'fillvalue', - 'type': None, - 'doc': 'Value to be returned when reading uninitialized parts of the dataset', - 'default': None}, - {'name': 'compressor', - 'type': (numcodecs.abc.Codec, bool), - 'doc': 'Zarr compressor filter to be used. Set to True to use Zarr default.' - 'Set to False to disable compression)', - 'default': None}, - {'name': 'filters', - 'type': (list, tuple), - 'doc': 'One or more Zarr-supported codecs used to transform data prior to compression.', - 'default': None}, - {'name': 'link_data', - 'type': bool, - 'doc': 'If data is an zarr.Array should it be linked to or copied. NOTE: This parameter is only ' + - 'allowed if data is an zarr.Array', - 'default': False} - ) + @docval( + { + "name": "data", + "type": (np.ndarray, list, tuple, zarr.Array, Iterable), + "doc": ( + "the data to be written. NOTE: If an zarr.Array is used, all other settings but link_data " + "will be ignored as the dataset will either be linked to or copied as is in ZarrIO." + ), + }, + { + "name": "chunks", + "type": (list, tuple), + "doc": "Chunk shape", + "default": None, + }, + { + "name": "fillvalue", + "type": None, + "doc": "Value to be returned when reading uninitialized parts of the dataset", + "default": None, + }, + { + "name": "compressor", + "type": (numcodecs.abc.Codec, bool), + "doc": ( + "Zarr compressor filter to be used. Set to True to use Zarr default. " + "Set to False to disable compression)" + ), + "default": None, + }, + { + "name": "filters", + "type": (list, tuple), + "doc": "One or more Zarr-supported codecs used to transform data prior to compression.", + "default": None, + }, + { + "name": "link_data", + "type": bool, + "doc": ( + "If data is an zarr.Array should it be linked to or copied. NOTE: This parameter is only " + "allowed if data is an zarr.Array" + ), + "default": False, + }, + ) def __init__(self, **kwargs): # TODO Need to add error checks and warnings to ZarrDataIO to check for parameter collisions and add tests data, chunks, fill_value, compressor, filters, self.__link_data = getargs( - 'data', 'chunks', 'fillvalue', 'compressor', 'filters', 'link_data', kwargs) + "data", "chunks", "fillvalue", "compressor", "filters", "link_data", kwargs + ) # NOTE: dtype and shape of the DataIO base class are not yet supported by ZarrDataIO. # These parameters are used to create empty data to allocate the data but # leave the I/O to fill the data to the user. - super(ZarrDataIO, self).__init__(data=data, - dtype=None, - shape=None) + super().__init__(data=data, dtype=None, shape=None) if not isinstance(data, zarr.Array) and self.__link_data: self.__link_data = False self.__iosettings = dict() if chunks is not None: - self.__iosettings['chunks'] = chunks + self.__iosettings["chunks"] = chunks if fill_value is not None: - self.__iosettings['fill_value'] = fill_value + self.__iosettings["fill_value"] = fill_value if compressor is not None: if isinstance(compressor, bool): # Disable compression by setting compressor to None if not compressor: - self.__iosettings['compressor'] = None + self.__iosettings["compressor"] = None # To use default settings simply do not specify any compressor settings else: pass # use the user-specified compressor else: - self.__iosettings['compressor'] = compressor + self.__iosettings["compressor"] = compressor if filters is not None: - self.__iosettings['filters'] = filters + self.__iosettings["filters"] = filters @property def link_data(self) -> bool: @@ -487,16 +509,17 @@ def from_h5py_dataset(h5dataset, **kwargs): :returns: ZarrDataIO object wrapping the dataset """ filters = ZarrDataIO.hdf5_to_zarr_filters(h5dataset) - fillval = h5dataset.fillvalue if 'fillvalue' not in kwargs else kwargs.pop('fillvalue') - if isinstance(fillval, bytes): # bytes are not JSON serializable so use string instead + fillval = h5dataset.fillvalue if "fillvalue" not in kwargs else kwargs.pop("fillvalue") + if isinstance(fillval, bytes): # bytes are not JSON serializable so use string instead fillval = fillval.decode("utf-8") - chunks = h5dataset.chunks if 'chunks' not in kwargs else kwargs.pop('chunks') + chunks = h5dataset.chunks if "chunks" not in kwargs else kwargs.pop("chunks") re = ZarrDataIO( data=h5dataset, filters=filters, fillvalue=fillval, chunks=chunks, - **kwargs) + **kwargs, + ) return re @staticmethod @@ -507,7 +530,7 @@ def hdf5_to_zarr_filters(h5dataset) -> list: # Check for unsupported filters if h5dataset.scaleoffset: # TODO: translate to numcodecs.fixedscaleoffset.FixedScaleOffset() - warn( f"{h5dataset.name} HDF5 scaleoffset filter ignored in Zarr") + warn(f"{h5dataset.name} HDF5 scaleoffset filter ignored in Zarr") if h5dataset.compression in ("szip", "lzf"): warn(f"{h5dataset.name} HDF5 szip or lzf compression ignored in Zarr") # Add the shuffle filter if possible @@ -524,7 +547,8 @@ def hdf5_to_zarr_filters(h5dataset) -> list: blocksize=total_bytes, clevel=clevel, shuffle=shuffle, - cname=blosc_compressors[compressor]) + cname=blosc_compressors[compressor], + ) filters.append(numcodecs.Blosc(**pars)) elif filter_id_str == "32015": filters.append(numcodecs.Zstd(level=properties[0])) @@ -534,7 +558,7 @@ def hdf5_to_zarr_filters(h5dataset) -> list: warn(f"{h5dataset.name} HDF5 lz4 compression ignored in Zarr") elif filter_id_str == "32008": warn(f"{h5dataset.name} HDF5 bitshuffle compression ignored in Zarr") - elif filter_id_str == "shuffle": # already handled above + elif filter_id_str == "shuffle": # already handled above pass else: warn(f"{h5dataset.name} HDF5 filter id {filter_id} with properties {properties} ignored in Zarr.") @@ -543,34 +567,45 @@ def hdf5_to_zarr_filters(h5dataset) -> list: @staticmethod def is_h5py_dataset(obj): """Check if the object is an instance of h5py.Dataset without requiring import of h5py""" - return (obj.__class__.__module__, obj.__class__.__name__) == ('h5py._hl.dataset', 'Dataset') + return (obj.__class__.__module__, obj.__class__.__name__) == ("h5py._hl.dataset", "Dataset") + class ZarrReference(dict): """ Data structure to describe a reference to another container used with the ZarrIO backend """ - @docval({'name': 'source', - 'type': str, - 'doc': 'Source of referenced object. Usually the relative path to the ' - 'Zarr file containing the referenced object', - 'default': None}, - {'name': 'path', - 'type': str, - 'doc': 'Path of referenced object within the source', - 'default': None}, - {'name': 'object_id', - 'type': str, - 'doc': 'Object_id of the referenced object (if available)', - 'default': None}, - {'name': 'source_object_id', - 'type': str, - 'doc': 'Object_id of the source (should always be available)', - 'default': None} - ) + @docval( + { + "name": "source", + "type": str, + "doc": "Source of referenced object. Usually the relative path to the " + "Zarr file containing the referenced object", + "default": None, + }, + { + "name": "path", + "type": str, + "doc": "Path of referenced object within the source", + "default": None, + }, + { + "name": "object_id", + "type": str, + "doc": "Object_id of the referenced object (if available)", + "default": None, + }, + { + "name": "source_object_id", + "type": str, + "doc": "Object_id of the source (should always be available)", + "default": None, + }, + ) def __init__(self, **kwargs): dest_source, dest_path, dest_object_id, dest_source_object_id = getargs( - 'source', 'path', 'object_id', 'source_object_id', kwargs) + "source", "path", "object_id", "source_object_id", kwargs + ) super(ZarrReference, self).__init__() self.source = dest_source self.path = dest_path @@ -579,32 +614,32 @@ def __init__(self, **kwargs): @property def source(self) -> str: - return super(ZarrReference, self).__getitem__('source') + return super().__getitem__("source") @property def path(self) -> str: - return super(ZarrReference, self).__getitem__('path') + return super().__getitem__("path") @property def object_id(self) -> str: - return super(ZarrReference, self).__getitem__('object_id') + return super().__getitem__("object_id") @property def source_object_id(self) -> str: - return super(ZarrReference, self).__getitem__('source_object_id') + return super().__getitem__("source_object_id") @source.setter def source(self, source: str): - super(ZarrReference, self).__setitem__('source', source) + super().__setitem__("source", source) @path.setter def path(self, path: str): - super(ZarrReference, self).__setitem__('path', path) + super().__setitem__("path", path) @object_id.setter def object_id(self, object_id: str): - super(ZarrReference, self).__setitem__('object_id', object_id) + super().__setitem__("object_id", object_id) @source_object_id.setter def source_object_id(self, object_id: str): - super(ZarrReference, self).__setitem__('source_object_id', object_id) + super().__setitem__("source_object_id", object_id) diff --git a/src/hdmf_zarr/zarr_utils.py b/src/hdmf_zarr/zarr_utils.py index c01623d0..e7790c76 100644 --- a/src/hdmf_zarr/zarr_utils.py +++ b/src/hdmf_zarr/zarr_utils.py @@ -3,6 +3,7 @@ e.g., for wrapping Zarr arrays on read, wrapping arrays for configuring write, or writing the spec among others """ + from abc import ABCMeta, abstractmethod from copy import copy import numpy as np @@ -20,10 +21,12 @@ class ZarrDataset(HDMFDataset): Extension of HDMFDataset to add Zarr compatibility """ - @docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'}, - {'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'}) + @docval( + {"name": "dataset", "type": (np.ndarray, Array), "doc": "the Zarr file lazily evaluate"}, + {"name": "io", "type": "ZarrIO", "doc": "the IO object that was used to read the underlying dataset"}, + ) def __init__(self, **kwargs): - self.__io = popargs('io', kwargs) + self.__io = popargs("io", kwargs) super().__init__(**kwargs) @property @@ -53,12 +56,12 @@ def invert(self): Return an object that defers reference resolution but in the opposite direction. """ - if not hasattr(self, '__inverted'): + if not hasattr(self, "__inverted"): cls = self.get_inverse_class() docval = get_docval(cls.__init__) kwargs = dict() for arg in docval: - kwargs[arg['name']] = getattr(self, arg['name']) + kwargs[arg["name"]] = getattr(self, arg["name"]) self.__inverted = cls(**kwargs) return self.__inverted @@ -129,12 +132,13 @@ class AbstractZarrTableDataset(DatasetOfReferences): references in compound datasets to either Builders and Containers. """ - @docval({'name': 'dataset', 'type': (np.ndarray, Array), 'doc': 'the Zarr file lazily evaluate'}, - {'name': 'io', 'type': 'ZarrIO', 'doc': 'the IO object that was used to read the underlying dataset'}, - {'name': 'types', 'type': (list, tuple), - 'doc': 'the list/tuple of reference types'}) + @docval( + {"name": "dataset", "type": (np.ndarray, Array), "doc": "the Zarr file lazily evaluate"}, + {"name": "io", "type": "ZarrIO", "doc": "the IO object that was used to read the underlying dataset"}, + {"name": "types", "type": (list, tuple), "doc": "the list/tuple of reference types"}, + ) def __init__(self, **kwargs): - types = popargs('types', kwargs) + types = popargs("types", kwargs) super().__init__(**kwargs) self.__refgetters = dict() for i, t in enumerate(types): @@ -149,15 +153,15 @@ def __init__(self, **kwargs): tmp = list() for i in range(len(self.dataset.dtype)): sub = self.dataset.dtype[i] - if np.issubdtype(sub, np.dtype('O')): - tmp.append('object') + if np.issubdtype(sub, np.dtype("O")): + tmp.append("object") if sub.metadata: - if 'vlen' in sub.metadata: - t = sub.metadata['vlen'] + if "vlen" in sub.metadata: + t = sub.metadata["vlen"] if t is str: - tmp.append('utf') + tmp.append("utf") elif t is bytes: - tmp.append('ascii') + tmp.append("ascii") else: tmp.append(sub.type.__name__) self.__dtype = tmp @@ -188,14 +192,14 @@ def _get_utf(self, string): """ Decode a dataset element to unicode """ - return string.decode('utf-8') if isinstance(string, bytes) else string + return string.decode("utf-8") if isinstance(string, bytes) else string def __get_regref(self, ref): obj = self._get_ref(ref) return obj[ref] def resolve(self, manager): - return self[0:len(self)] + return self[0 : len(self)] def __iter__(self): for i in range(len(self)): @@ -217,7 +221,7 @@ def __getitem__(self, arg): @property def dtype(self): - return 'object' + return "object" class ContainerZarrTableDataset(ContainerResolverMixin, AbstractZarrTableDataset): From 40057575ea13a549fc82f9e8119adaaae92fcb43 Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 17 Dec 2024 19:30:13 -0800 Subject: [PATCH 20/23] Add ruff exception --- src/hdmf_zarr/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hdmf_zarr/__init__.py b/src/hdmf_zarr/__init__.py index 3866afbb..805efb0e 100644 --- a/src/hdmf_zarr/__init__.py +++ b/src/hdmf_zarr/__init__.py @@ -29,9 +29,9 @@ number={}, pages={165-179}, doi={10.1109/BigData47090.2019.9005648}} -""" +""" # noqa: E501 ), - description="HDMF: Hierarchical Data Modeling Framework for Modern Science Data Standards", # noqa: E501 + description="HDMF: Hierarchical Data Modeling Framework for Modern Science Data Standards", path="hdmf/", version=__version__, cite_module=True, From 38113eb8bc896aaa1d97d919303a20469c3d2886 Mon Sep 17 00:00:00 2001 From: rly Date: Wed, 18 Dec 2024 10:12:43 -0800 Subject: [PATCH 21/23] Fix spelling --- src/hdmf_zarr/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 3b8de130..5477e533 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -1328,7 +1328,7 @@ def __resolve_dtype_helper__(cls, dtype): elif isinstance(dtype, list): return np.dtype([(x["name"], cls.__resolve_dtype_helper__(x["dtype"])) for x in dtype]) else: - raise ValueError(f"Cant resolve dtype {dtype}") + raise ValueError(f"Can't resolve dtype {dtype}") @classmethod def get_type(cls, data): From 95d08a3c33aa1bfb115025f954999650d7887e36 Mon Sep 17 00:00:00 2001 From: rly Date: Wed, 18 Dec 2024 11:47:32 -0800 Subject: [PATCH 22/23] Run black on tests --- test_gallery.py | 81 +- tests/unit/base_tests_zarrio.py | 1117 ++++++++++++++------------- tests/unit/test_fsspec_streaming.py | 5 +- tests/unit/test_io_convert.py | 516 ++++++------- tests/unit/test_nwbzarrio.py | 10 +- tests/unit/test_parallel_write.py | 62 +- tests/unit/test_zarrdataio.py | 119 +-- tests/unit/test_zarrio.py | 83 +- tests/unit/utils.py | 441 ++++++----- 9 files changed, 1266 insertions(+), 1168 deletions(-) diff --git a/test_gallery.py b/test_gallery.py index c03fa19b..7a66d006 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -23,21 +23,13 @@ def _import_from_file(script): spec.loader.exec_module(module) -_pkg_resources_warning_re = ( - "pkg_resources is deprecated as an API" -) +_pkg_resources_warning_re = "pkg_resources is deprecated as an API" -_pkg_resources_declare_warning_re = ( - r"Deprecated call to `pkg_resources\.declare_namespace.*" -) +_pkg_resources_declare_warning_re = r"Deprecated call to `pkg_resources\.declare_namespace.*" -_numpy_warning_re = ( - "numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192" -) +_numpy_warning_re = "numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192" -_distutils_warning_re = ( - "distutils Version classes are deprecated. Use packaging.version instead." -) +_distutils_warning_re = "distutils Version classes are deprecated. Use packaging.version instead." _experimental_warning_re = ( "The ZarrIO backend is experimental. It is under active development. " @@ -50,8 +42,8 @@ def _import_from_file(script): ) _deprecation_warning_map = ( - 'Classes in map.py should be imported from hdmf.build. Importing from hdmf.build.map will be removed ' - 'in HDMF 3.0.' + "Classes in map.py should be imported from hdmf.build. Importing from hdmf.build.map will be removed " + "in HDMF 3.0." ) _deprecation_warning_fmt_docval_args = ( @@ -69,22 +61,17 @@ def _import_from_file(script): "is set), then you will need to pop the extra arguments out of kwargs before calling the function." ) -_deprecation_warning_pandas_pyarrow_re = ( - r"\nPyarrow will become a required dependency of pandas.*" -) +_deprecation_warning_pandas_pyarrow_re = r"\nPyarrow will become a required dependency of pandas.*" -_deprecation_warning_datetime = ( - r"datetime.datetime.utcfromtimestamp() *" -) +_deprecation_warning_datetime = r"datetime.datetime.utcfromtimestamp() *" -_deprecation_warning_zarr_store = ( - r"The NestedDirectoryStore is deprecated *" -) +_deprecation_warning_zarr_store = r"The NestedDirectoryStore is deprecated *" _deprecation_warning_numpy = ( "__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. " "__array__ must implement 'dtype' and 'copy' keyword arguments." ) + def run_gallery_tests(): global TOTAL, FAILURES, ERRORS logging.info("Testing execution of Sphinx Gallery files") @@ -92,7 +79,7 @@ def run_gallery_tests(): # get all python file names in docs/gallery gallery_file_names = list() for root, _, files in os.walk( - os.path.join(os.path.dirname(__file__), "docs", "gallery") + os.path.join(os.path.dirname(__file__), "docs", "gallery"), ): for f in files: if f.endswith(".py"): @@ -105,57 +92,59 @@ def run_gallery_tests(): for script in gallery_file_names: logging.info("Executing %s" % script) os.chdir(curr_dir) # Reset the working directory - script_abs = os.path.abspath(script) # Determine the full path of the script + script_abs = os.path.abspath(script) # Determine the full path of the script # Set the working dir to be relative to the script to allow the use of relative file paths in the scripts os.chdir(os.path.dirname(script_abs)) try: with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", message=_deprecation_warning_map, category=DeprecationWarning - ) + warnings.filterwarnings("ignore", message=_deprecation_warning_map, category=DeprecationWarning) warnings.filterwarnings( "ignore", message=_deprecation_warning_fmt_docval_args, category=PendingDeprecationWarning ) warnings.filterwarnings( "ignore", message=_deprecation_warning_call_docval_func, category=PendingDeprecationWarning ) - warnings.filterwarnings( - "ignore", message=_experimental_warning_re, category=UserWarning - ) - warnings.filterwarnings( - "ignore", message=_user_warning_transpose, category=UserWarning - ) + warnings.filterwarnings("ignore", message=_experimental_warning_re, category=UserWarning) + warnings.filterwarnings("ignore", message=_user_warning_transpose, category=UserWarning) warnings.filterwarnings( # this warning is triggered from pandas when HDMF is installed with the minimum requirements - "ignore", message=_distutils_warning_re, category=DeprecationWarning + "ignore", + message=_distutils_warning_re, + category=DeprecationWarning, ) warnings.filterwarnings( # this warning is triggered when some numpy extension code in an upstream package was compiled # against a different version of numpy than the one installed - "ignore", message=_numpy_warning_re, category=RuntimeWarning + "ignore", + message=_numpy_warning_re, + category=RuntimeWarning, ) warnings.filterwarnings( # this warning is triggered when downstream code such as pynwb uses pkg_resources>=5.13 - "ignore", message=_pkg_resources_warning_re, category=DeprecationWarning + "ignore", + message=_pkg_resources_warning_re, + category=DeprecationWarning, ) warnings.filterwarnings( # this warning is triggered when downstream code such as pynwb uses pkg_resources>=5.13 - "ignore", message=_pkg_resources_declare_warning_re, category=DeprecationWarning + "ignore", + message=_pkg_resources_declare_warning_re, + category=DeprecationWarning, ) warnings.filterwarnings( # this warning is triggered from pandas - "ignore", message=_deprecation_warning_pandas_pyarrow_re, category=DeprecationWarning + "ignore", + message=_deprecation_warning_pandas_pyarrow_re, + category=DeprecationWarning, ) warnings.filterwarnings( # this is triggered from datetime - "ignore", message=_deprecation_warning_datetime, category=DeprecationWarning - ) - warnings.filterwarnings( - "ignore", message=_deprecation_warning_zarr_store, category=FutureWarning - ) - warnings.filterwarnings( - "ignore", message=_deprecation_warning_numpy, category=DeprecationWarning + "ignore", + message=_deprecation_warning_datetime, + category=DeprecationWarning, ) + warnings.filterwarnings("ignore", message=_deprecation_warning_zarr_store, category=FutureWarning) + warnings.filterwarnings("ignore", message=_deprecation_warning_numpy, category=DeprecationWarning) _import_from_file(script_abs) except Exception: print(traceback.format_exc()) diff --git a/tests/unit/base_tests_zarrio.py b/tests/unit/base_tests_zarrio.py index ddfe9dc5..c30cf482 100644 --- a/tests/unit/base_tests_zarrio.py +++ b/tests/unit/base_tests_zarrio.py @@ -3,6 +3,7 @@ The actual tests are then instantiated with various different backends in the test_zarrio.py module.""" + import unittest import os import numpy as np @@ -13,31 +14,23 @@ import zarr from hdmf_zarr.backend import ZarrIO from hdmf_zarr.utils import ZarrDataIO, ZarrReference -from tests.unit.utils import (Baz, BazData, BazBucket, get_baz_buildmanager) +from tests.unit.utils import Baz, BazData, BazBucket, get_baz_buildmanager # Try to import numcodecs and disable compression tests if it is not available try: from numcodecs import Blosc, Delta, JSON + DISABLE_ZARR_COMPRESSION_TESTS = False except ImportError: DISABLE_ZARR_COMPRESSION_TESTS = True from hdmf.spec.namespace import NamespaceCatalog -from hdmf.build import (GroupBuilder, - DatasetBuilder, - LinkBuilder, - ReferenceBuilder, - OrphanContainerBuildError) +from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, OrphanContainerBuildError from hdmf.data_utils import DataChunkIterator from hdmf.testing import TestCase -from hdmf.backends.io import (HDMFIO, - UnsupportedOperation) +from hdmf.backends.io import HDMFIO, UnsupportedOperation -from tests.unit.utils import (Foo, - FooBucket, - FooFile, - get_foo_buildmanager, - CacheSpecTestHelper) +from tests.unit.utils import Foo, FooBucket, FooFile, get_foo_buildmanager, CacheSpecTestHelper from abc import ABCMeta, abstractmethod @@ -75,7 +68,7 @@ def tearDown(self): """ Remove all files and folders defined by self.store_path """ - paths = self.store_path if isinstance(self.store_path, list) else [self.store_path, ] + paths = self.store_path if isinstance(self.store_path, list) else [self.store_path] for path in paths: if os.path.exists(path): if os.path.isdir(path): @@ -91,6 +84,7 @@ class ZarrStoreTestCase(TestCase): Class that creates a zarr file containing groups, datasets, and references for general purpose testing. """ + def setUp(self): self.store_path = "test_io.zarr" @@ -104,22 +98,22 @@ def tearDown(self): def createReferenceBuilder(self): data_1 = np.arange(100, 200, 10).reshape(2, 5) data_2 = np.arange(0, 200, 10).reshape(4, 5) - dataset_1 = DatasetBuilder('dataset_1', data_1) - dataset_2 = DatasetBuilder('dataset_2', data_2) + dataset_1 = DatasetBuilder("dataset_1", data_1) + dataset_2 = DatasetBuilder("dataset_2", data_2) ref_dataset_1 = ReferenceBuilder(dataset_1) ref_dataset_2 = ReferenceBuilder(dataset_2) ref_data = [ref_dataset_1, ref_dataset_2] - dataset_ref = DatasetBuilder('ref_dataset', ref_data, dtype='object') + dataset_ref = DatasetBuilder("ref_dataset", ref_data, dtype="object") - builder = GroupBuilder('root', - source=self.store_path, - datasets={'dataset_1': dataset_1, - 'dataset_2': dataset_2, - 'ref_dataset': dataset_ref}) + builder = GroupBuilder( + name="root", + source=self.store_path, + datasets={"dataset_1": dataset_1, "dataset_2": dataset_2, "ref_dataset": dataset_ref}, + ) return builder - def create_zarr(self, consolidate_metadata=True, force_overwrite=False, mode='a'): + def create_zarr(self, consolidate_metadata=True, force_overwrite=False, mode="a"): builder = self.createReferenceBuilder() writer = ZarrIO(self.store_path, mode=mode, force_overwrite=force_overwrite) writer.write_builder(builder, consolidate_metadata) @@ -151,85 +145,91 @@ def setUp(self): self.store_path = "test_io.zarr" def createGroupBuilder(self): - self.foo_builder = GroupBuilder('foo1', - attributes={'data_type': 'Foo', - 'namespace': 'test_core', - 'attr1': 17.5}, - datasets={'my_data': self.__dataset_builder}) + self.foo_builder = GroupBuilder( + "foo1", + attributes={"data_type": "Foo", "namespace": "test_core", "attr1": 17.5}, + datasets={"my_data": self.__dataset_builder}, + ) # self.foo = Foo('foo1', self.__dataset_builder.data, attr1="bar", attr2=17, attr3=3.14) # self.manager.prebuilt(self.foo, self.foo_builder) self.builder = GroupBuilder( - 'root', + name="root", source=self.store_path, - groups={'test_bucket': - GroupBuilder('test_bucket', - groups={'foo_holder': - GroupBuilder('foo_holder', - groups={'foo1': self.foo_builder})})}, - attributes={'data_type': 'FooFile'}) + groups={ + "test_bucket": GroupBuilder( + name="test_bucket", + groups={ + "foo_holder": GroupBuilder(name="foo_holder", groups={"foo1": self.foo_builder}), + }, + ) + }, + attributes={"data_type": "FooFile"}, + ) def createReferenceBuilder(self): data_1 = np.arange(100, 200, 10).reshape(2, 5) data_2 = np.arange(0, 200, 10).reshape(4, 5) - dataset_1 = DatasetBuilder('dataset_1', data_1) - dataset_2 = DatasetBuilder('dataset_2', data_2) + dataset_1 = DatasetBuilder("dataset_1", data_1) + dataset_2 = DatasetBuilder("dataset_2", data_2) ref_dataset_1 = ReferenceBuilder(dataset_1) ref_dataset_2 = ReferenceBuilder(dataset_2) ref_data = [ref_dataset_1, ref_dataset_2] - dataset_ref = DatasetBuilder('ref_dataset', ref_data, dtype='object') + dataset_ref = DatasetBuilder("ref_dataset", ref_data, dtype="object") - builder = GroupBuilder('root', - source=self.store_path, - datasets={'dataset_1': dataset_1, - 'dataset_2': dataset_2, - 'ref_dataset': dataset_ref}) + builder = GroupBuilder( + name="root", + source=self.store_path, + datasets={"dataset_1": dataset_1, "dataset_2": dataset_2, "ref_dataset": dataset_ref}, + ) return builder def createReferenceCompoundBuilder(self): data_1 = np.arange(100, 200, 10).reshape(2, 5) data_2 = np.arange(0, 200, 10).reshape(4, 5) - dataset_1 = DatasetBuilder('dataset_1', data_1) - dataset_2 = DatasetBuilder('dataset_2', data_2) + dataset_1 = DatasetBuilder("dataset_1", data_1) + dataset_2 = DatasetBuilder("dataset_2", data_2) ref_dataset_1 = ReferenceBuilder(dataset_1) ref_dataset_2 = ReferenceBuilder(dataset_2) ref_data = [ - (1, 'dataset_1', ref_dataset_1), - (2, 'dataset_2', ref_dataset_2) + (1, "dataset_1", ref_dataset_1), + (2, "dataset_2", ref_dataset_2), + ] + ref_data_type = [ + {"name": "id", "dtype": "int"}, + {"name": "name", "dtype": str}, + {"name": "reference", "dtype": "object"}, ] - ref_data_type = [{'name': 'id', 'dtype': 'int'}, - {'name': 'name', 'dtype': str}, - {'name': 'reference', 'dtype': 'object'}] - dataset_ref = DatasetBuilder('ref_dataset', ref_data, dtype=ref_data_type) - builder = GroupBuilder('root', - source=self.store_path, - datasets={'dataset_1': dataset_1, - 'dataset_2': dataset_2, - 'ref_dataset': dataset_ref}) + dataset_ref = DatasetBuilder("ref_dataset", ref_data, dtype=ref_data_type) + builder = GroupBuilder( + name="root", + source=self.store_path, + datasets={"dataset_1": dataset_1, "dataset_2": dataset_2, "ref_dataset": dataset_ref}, + ) return builder def test_cannot_read(self): assert not ZarrIO.can_read("incorrect_path") def read_test_dataset(self): - reader = ZarrIO(self.store_path, manager=self.manager, mode='r') + reader = ZarrIO(self.store_path, manager=self.manager, mode="r") self.root = reader.read_builder() - dataset = self.root['test_bucket/foo_holder/foo1/my_data'] + dataset = self.root["test_bucket/foo_holder/foo1/my_data"] return dataset def read(self): - reader = ZarrIO(self.store_path, manager=self.manager, mode='r') + reader = ZarrIO(self.store_path, manager=self.manager, mode="r") self.root = reader.read_builder() def test_cache_spec(self): - tempIO = ZarrIO(self.store_path, manager=self.manager, mode='w') + tempIO = ZarrIO(self.store_path, manager=self.manager, mode="w") # Setup all the data we need - foo1 = Foo('foo1', [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) - foo2 = Foo('foo2', [5, 6, 7, 8, 9], "I am foo2", 34, 6.28) - foobucket = FooBucket('test_bucket', [foo1, foo2]) + foo1 = Foo("foo1", [0, 1, 2, 3, 4], "I am foo1", 17, 3.14) + foo2 = Foo("foo2", [5, 6, 7, 8, 9], "I am foo2", 34, 6.28) + foobucket = FooBucket("test_bucket", [foo1, foo2]) foofile = FooFile(buckets=[foobucket]) # Write the first file @@ -239,16 +239,16 @@ def test_cache_spec(self): # Load the spec and assert that it is valid ns_catalog = NamespaceCatalog() ZarrIO.load_namespaces(ns_catalog, self.store_path) - self.assertEqual(ns_catalog.namespaces, ('test_core',)) + self.assertEqual(ns_catalog.namespaces, ("test_core",)) source_types = CacheSpecTestHelper.get_types(self.manager.namespace_catalog) read_types = CacheSpecTestHelper.get_types(ns_catalog) self.assertSetEqual(source_types, read_types) def test_write_int(self, test_data=None): data = np.arange(100, 200, 10).reshape(2, 5) if test_data is None else test_data - self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) + self.__dataset_builder = DatasetBuilder("my_data", data, attributes={"attr2": 17}) self.createGroupBuilder() - writer = ZarrIO(self.store_path, manager=self.manager, mode='a') + writer = ZarrIO(self.store_path, manager=self.manager, mode="a") writer.write_builder(self.builder) writer.close() assert ZarrIO.can_read(self.store_path) @@ -259,73 +259,70 @@ def test_write_compound(self, test_data=None): each tuple consists of an int and a string :return: """ - data = [(1, 'Allen'), - (2, 'Bob'), - (3, 'Mike'), - (4, 'Jenny')] if test_data is None else test_data - data_type = [{'name': 'id', 'dtype': 'int'}, - {'name': 'name', 'dtype': str}] - self.__dataset_builder = DatasetBuilder('my_data', data, dtype=data_type) + if test_data is None: + test_data = [(1, "Allen"), (2, "Bob"), (3, "Mike"), (4, "Jenny")] + data_type = [{"name": "id", "dtype": "int"}, {"name": "name", "dtype": str}] + self.__dataset_builder = DatasetBuilder("my_data", test_data, dtype=data_type) self.createGroupBuilder() - writer = ZarrIO(self.store_path, manager=self.manager, mode='w') + writer = ZarrIO(self.store_path, manager=self.manager, mode="w") writer.write_builder(self.builder) writer.close() def test_write_chunk(self, test_data=None): - data = np.arange(100, 200, 10).reshape(2, 5) if test_data is None else test_data - data_io = ZarrDataIO(data=data, chunks=(1, 5), fillvalue=-1) - self.__dataset_builder = DatasetBuilder('my_data', data_io, attributes={'attr2': 17}) + if test_data is None: + test_data = np.arange(100, 200, 10).reshape(2, 5) + data_io = ZarrDataIO(data=test_data, chunks=(1, 5), fillvalue=-1) + self.__dataset_builder = DatasetBuilder("my_data", data_io, attributes={"attr2": 17}) self.createGroupBuilder() - writer = ZarrIO(self.store_path, manager=self.manager, mode='a') + writer = ZarrIO(self.store_path, manager=self.manager, mode="a") writer.write_builder(self.builder) writer.close() def test_write_strings(self, test_data=None): - data = [['a', 'aa', 'aaa', 'aaaa', 'aaaaa'], - ['b', 'bb', 'bbb', 'bbbb', 'bbbbb']] if test_data is None else test_data - self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) + if test_data is None: + test_data = [["a", "aa", "aaa", "aaaa", "aaaaa"], ["b", "bb", "bbb", "bbbb", "bbbbb"]] + self.__dataset_builder = DatasetBuilder("my_data", test_data, attributes={"attr2": 17}) self.createGroupBuilder() - writer = ZarrIO(self.store_path, manager=self.manager, mode='a') + writer = ZarrIO(self.store_path, manager=self.manager, mode="a") writer.write_builder(self.builder) writer.close() def test_write_links(self, test_data=None): - data = np.arange(100, 200, 10).reshape(2, 5) if test_data is None else test_data - self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) + if test_data is None: + test_data = np.arange(100, 200, 10).reshape(2, 5) + self.__dataset_builder = DatasetBuilder("my_data", test_data, attributes={"attr2": 17}) self.createGroupBuilder() - link_parent = self.builder['test_bucket'] - link_parent.set_link(LinkBuilder(self.foo_builder, 'my_link')) - link_parent.set_link(LinkBuilder(self.__dataset_builder, 'my_dataset')) - writer = ZarrIO(self.store_path, manager=self.manager, mode='a') + link_parent = self.builder["test_bucket"] + link_parent.set_link(LinkBuilder(self.foo_builder, "my_link")) + link_parent.set_link(LinkBuilder(self.__dataset_builder, "my_dataset")) + writer = ZarrIO(self.store_path, manager=self.manager, mode="a") writer.write_builder(self.builder) writer.close() def test_write_link_array(self): data = np.arange(100, 200, 10).reshape(2, 5) - self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) + self.__dataset_builder = DatasetBuilder("my_data", data, attributes={"attr2": 17}) self.createGroupBuilder() - writer = ZarrIO(self.store_path, manager=self.manager, mode='a') + writer = ZarrIO(self.store_path, manager=self.manager, mode="a") writer.write_builder(self.builder) - zarr_file = zarr.open(self.store_path, mode='r') + zarr_file = zarr.open(self.store_path, mode="r") zarr_array = zarr_file["/test_bucket/foo_holder/foo1/my_data"] link_io = ZarrDataIO(data=zarr_array, link_data=True) - link_dataset = DatasetBuilder('dataset_link', link_io) - self.builder['test_bucket'].set_dataset(link_dataset) + link_dataset = DatasetBuilder("dataset_link", link_io) + self.builder["test_bucket"].set_dataset(link_dataset) writer.write_builder(self.builder) writer.close() - reader = ZarrIO(self.store_path, manager=self.manager, mode='r') + reader = ZarrIO(self.store_path, manager=self.manager, mode="r") self.root = reader.read_builder() - read_link = self.root['test_bucket/dataset_link'] - read_link_data = read_link['builder']['data'][:] + read_link = self.root["test_bucket/dataset_link"] + read_link_data = read_link["builder"]["data"][:] self.assertTrue(np.all(data == read_link_data)) reader.close() def test_write_reference(self): builder = self.createReferenceBuilder() - writer = ZarrIO(self.store_path, - manager=self.manager, - mode='a') + writer = ZarrIO(self.store_path, manager=self.manager, mode="a") writer.write_builder(builder) writer.close() @@ -334,63 +331,62 @@ def test_write_references_roundtrip(self): num_bazs = 1 bazs = [] # set up dataset of references for i in range(num_bazs): - bazs.append(Baz(name='baz%d' % i)) - baz_data = BazData(name='baz_data', data=bazs) + bazs.append(Baz(name="baz%d" % i)) + baz_data = BazData(name="baz_data", data=bazs) container = BazBucket(bazs=bazs, baz_data=baz_data) manager = get_baz_buildmanager() # write to file - with ZarrIO(self.store_path, manager=manager, mode='w') as writer: + with ZarrIO(self.store_path, manager=manager, mode="w") as writer: writer.write(container=container) # read from file and validate references - with ZarrIO(self.store_path, manager=manager, mode='r') as reader: + with ZarrIO(self.store_path, manager=manager, mode="r") as reader: read_container = reader.read() for i in range(num_bazs): - baz_name = 'baz%d' % i + baz_name = "baz%d" % i expected_container = read_container.bazs[baz_name] - expected_value = {'source': 'test_io.zarr', - 'path': '/bazs/' + baz_name, - 'object_id': expected_container.object_id, - 'source_object_id': read_container.object_id} + expected_value = { + "source": "test_io.zarr", + "path": "/bazs/" + baz_name, + "object_id": expected_container.object_id, + "source_object_id": read_container.object_id, + } # Read the dict with the definition of the reference from the raw Zarr file and compare # to also check that reference (included object id's) are defined correctly - self.assertDictEqual(reader.file['baz_data'][i], expected_value) + self.assertDictEqual(reader.file["baz_data"][i], expected_value) # Also test using the low-level reference functions zarr_ref = ZarrReference(**expected_value) # Check the ZarrReference first - self.assertEqual(zarr_ref.object_id, expected_value['object_id']) - self.assertEqual(zarr_ref.source_object_id, expected_value['source_object_id']) + self.assertEqual(zarr_ref.object_id, expected_value["object_id"]) + self.assertEqual(zarr_ref.source_object_id, expected_value["source_object_id"]) def test_write_reference_compound(self): builder = self.createReferenceCompoundBuilder() - writer = ZarrIO(self.store_path, manager=self.manager, mode='a') + writer = ZarrIO(self.store_path, manager=self.manager, mode="a") writer.write_builder(builder) writer.close() def test_read_int(self): test_data = np.arange(100, 200, 10).reshape(5, 2) self.test_write_int(test_data=test_data) - dataset = self.read_test_dataset()['data'][:] + dataset = self.read_test_dataset()["data"][:] self.assertTrue(np.all(test_data == dataset)) def test_read_chunk(self): test_data = np.arange(100, 200, 10).reshape(5, 2) self.test_write_chunk(test_data=test_data) - dataset = self.read_test_dataset()['data'][:] + dataset = self.read_test_dataset()["data"][:] self.assertTrue(np.all(test_data == dataset)) def test_read_strings(self): - test_data = [['a1', 'aa2', 'aaa3', 'aaaa4', 'aaaaa5'], - ['b1', 'bb2', 'bbb3', 'bbbb4', 'bbbbb5']] + test_data = [["a1", "aa2", "aaa3", "aaaa4", "aaaaa5"], ["b1", "bb2", "bbb3", "bbbb4", "bbbbb5"]] self.test_write_strings(test_data=test_data) - dataset = self.read_test_dataset()['data'][:] + dataset = self.read_test_dataset()["data"][:] self.assertTrue(np.all(np.asarray(test_data) == dataset)) def test_read_compound(self): - test_data = [(1, 'Allen1'), - (2, 'Bob1'), - (3, 'Mike1')] + test_data = [(1, "Allen1"), (2, "Bob1"), (3, "Mike1")] self.test_write_compound(test_data=test_data) - dataset = self.read_test_dataset()['data'] + dataset = self.read_test_dataset()["data"] self.assertTupleEqual(test_data[0], tuple(dataset[0])) self.assertTupleEqual(test_data[1], tuple(dataset[1])) self.assertTupleEqual(test_data[2], tuple(dataset[2])) @@ -399,81 +395,89 @@ def test_read_link(self): test_data = np.arange(100, 200, 10).reshape(5, 2) self.test_write_links(test_data=test_data) self.read() - link_data = self.root['test_bucket'].links['my_dataset'].builder.data[()] + link_data = self.root["test_bucket"].links["my_dataset"].builder.data[()] self.assertTrue(np.all(np.asarray(test_data) == link_data)) # print(self.root['test_bucket'].links['my_dataset'].builder.data[()]) def test_read_link_buf(self): data = np.arange(100, 200, 10).reshape(2, 5) - self.__dataset_builder = DatasetBuilder('my_data', data, attributes={'attr2': 17}) + self.__dataset_builder = DatasetBuilder("my_data", data, attributes={"attr2": 17}) self.createGroupBuilder() - link_parent_1 = self.builder['test_bucket'] - link_parent_2 = self.builder['test_bucket/foo_holder'] - link_parent_1.set_link(LinkBuilder(self.__dataset_builder, 'my_dataset_1')) - link_parent_2.set_link(LinkBuilder(self.__dataset_builder, 'my_dataset_2')) - writer = ZarrIO(self.store_path, manager=self.manager, mode='a') + link_parent_1 = self.builder["test_bucket"] + link_parent_2 = self.builder["test_bucket/foo_holder"] + link_parent_1.set_link(LinkBuilder(self.__dataset_builder, "my_dataset_1")) + link_parent_2.set_link(LinkBuilder(self.__dataset_builder, "my_dataset_2")) + writer = ZarrIO(self.store_path, manager=self.manager, mode="a") writer.write_builder(self.builder) writer.close() self.read() - self.assertTrue(self.root['test_bucket'].links['my_dataset_1'].builder == - self.root['test_bucket/foo_holder'].links['my_dataset_2'].builder) + self.assertTrue( + self.root["test_bucket"].links["my_dataset_1"].builder + == self.root["test_bucket/foo_holder"].links["my_dataset_2"].builder + ) def test_read_reference(self): self.test_write_reference() self.read() - builder = self.createReferenceBuilder()['ref_dataset'] - read_builder = self.root['ref_dataset'] + builder = self.createReferenceBuilder()["ref_dataset"] + read_builder = self.root["ref_dataset"] # Load the linked arrays and confirm we get the same data as we had in the original builder - for i, v in enumerate(read_builder['data']): - self.assertTrue(np.all(builder['data'][i]['builder']['data'] == v['data'][:])) + for i, v in enumerate(read_builder["data"]): + self.assertTrue(np.all(builder["data"][i]["builder"]["data"] == v["data"][:])) def test_read_reference_compound(self): self.test_write_reference_compound() self.read() - builder = self.createReferenceCompoundBuilder()['ref_dataset'] - read_builder = self.root['ref_dataset'] + builder = self.createReferenceCompoundBuilder()["ref_dataset"] + read_builder = self.root["ref_dataset"] # ensure the array was written as a compound array - ref_dtype = np.dtype([('id', ' np.ndarray: def test_parallel_write(tmpdir): number_of_jobs = 2 - data = np.array([1., 2., 3.]) + data = np.array([1.0, 2.0, 3.0]) column = VectorData(name="TestColumn", description="", data=PickleableDataChunkIterator(data=data)) dynamic_table = DynamicTable(name="TestTable", description="", id=list(range(3)), columns=[column]) zarr_top_level_path = str(tmpdir / "test_parallel_write.zarr") - with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: io.write(container=dynamic_table, number_of_jobs=number_of_jobs) with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="r") as io: @@ -96,22 +98,26 @@ def test_parallel_write(tmpdir): def test_mixed_iterator_types(tmpdir): number_of_jobs = 2 - generic_iterator_data = np.array([1., 2., 3.]) + generic_iterator_data = np.array([1.0, 2.0, 3.0]) generic_iterator_column = VectorData( name="TestGenericIteratorColumn", description="", - data=PickleableDataChunkIterator(data=generic_iterator_data) + data=PickleableDataChunkIterator(data=generic_iterator_data), ) - classic_iterator_data = np.array([4., 5., 6.]) + classic_iterator_data = np.array([4.0, 5.0, 6.0]) classic_iterator_column = VectorData( name="TestClassicIteratorColumn", description="", - data=DataChunkIterator(data=classic_iterator_data) + data=DataChunkIterator(data=classic_iterator_data), ) - unwrappped_data = np.array([7., 8., 9.]) - unwrapped_column = VectorData(name="TestUnwrappedColumn", description="", data=unwrappped_data) + unwrappped_data = np.array([7.0, 8.0, 9.0]) + unwrapped_column = VectorData( + name="TestUnwrappedColumn", + description="", + data=unwrappped_data, + ) dynamic_table = DynamicTable( name="TestTable", description="", @@ -120,7 +126,7 @@ def test_mixed_iterator_types(tmpdir): ) zarr_top_level_path = str(tmpdir / "test_mixed_iterator_types.zarr") - with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: io.write(container=dynamic_table, number_of_jobs=number_of_jobs) with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="r") as io: @@ -138,18 +144,18 @@ def test_mixed_iterator_types(tmpdir): def test_mixed_iterator_pickleability(tmpdir): number_of_jobs = 2 - pickleable_iterator_data = np.array([1., 2., 3.]) + pickleable_iterator_data = np.array([1.0, 2.0, 3.0]) pickleable_iterator_column = VectorData( name="TestGenericIteratorColumn", description="", - data=PickleableDataChunkIterator(data=pickleable_iterator_data) + data=PickleableDataChunkIterator(data=pickleable_iterator_data), ) - not_pickleable_iterator_data = np.array([4., 5., 6.]) + not_pickleable_iterator_data = np.array([4.0, 5.0, 6.0]) not_pickleable_iterator_column = VectorData( name="TestClassicIteratorColumn", description="", - data=NotPickleableDataChunkIterator(data=not_pickleable_iterator_data) + data=NotPickleableDataChunkIterator(data=not_pickleable_iterator_data), ) dynamic_table = DynamicTable( @@ -160,7 +166,7 @@ def test_mixed_iterator_pickleability(tmpdir): ) zarr_top_level_path = str(tmpdir / "test_mixed_iterator_pickleability.zarr") - with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: io.write(container=dynamic_table, number_of_jobs=number_of_jobs) with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="r") as io: @@ -180,20 +186,20 @@ def test_simple_tqdm(tmpdir): zarr_top_level_path = str(tmpdir / "test_simple_tqdm.zarr") with patch("sys.stderr", new=StringIO()) as tqdm_out: - with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: column = VectorData( name="TestColumn", description="", data=PickleableDataChunkIterator( - data=np.array([1., 2., 3.]), + data=np.array([1.0, 2.0, 3.0]), display_progress=True, - ) + ), ) dynamic_table = DynamicTable( name="TestTable", description="", columns=[column], - id=list(range(3)) # must provide id's when all columns are iterators + id=list(range(3)), # must provide id's when all columns are iterators ) io.write(container=dynamic_table, number_of_jobs=number_of_jobs) @@ -208,29 +214,29 @@ def test_compound_tqdm(tmpdir): zarr_top_level_path = str(tmpdir / "test_compound_tqdm.zarr") with patch("sys.stderr", new=StringIO()) as tqdm_out: - with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: pickleable_column = VectorData( name="TestPickleableIteratorColumn", description="", data=PickleableDataChunkIterator( - data=np.array([1., 2., 3.]), + data=np.array([1.0, 2.0, 3.0]), display_progress=True, - ) + ), ) not_pickleable_column = VectorData( name="TestNotPickleableColumn", description="", data=NotPickleableDataChunkIterator( - data=np.array([4., 5., 6.]), + data=np.array([4.0, 5.0, 6.0]), display_progress=True, - progress_bar_options=dict(desc=expected_desc_not_pickleable, position=1) - ) + progress_bar_options=dict(desc=expected_desc_not_pickleable, position=1), + ), ) dynamic_table = DynamicTable( name="TestTable", description="", columns=[pickleable_column, not_pickleable_column], - id=list(range(3)) # must provide id's when all columns are iterators + id=list(range(3)), # must provide id's when all columns are iterators ) io.write(container=dynamic_table, number_of_jobs=number_of_jobs) @@ -242,7 +248,7 @@ def test_compound_tqdm(tmpdir): def test_extra_keyword_argument_propagation(tmpdir): number_of_jobs = 2 - column = VectorData(name="TestColumn", description="", data=np.array([1., 2., 3.])) + column = VectorData(name="TestColumn", description="", data=np.array([1.0, 2.0, 3.0])) dynamic_table = DynamicTable(name="TestTable", description="", id=list(range(3)), columns=[column]) zarr_top_level_path = str(tmpdir / "test_extra_parallel_write_keyword_arguments.zarr") @@ -263,12 +269,12 @@ def test_extra_keyword_argument_propagation(tmpdir): for test_keyword_argument_pair in test_keyword_argument_pairs: test_max_threads_per_process = test_keyword_argument_pair["max_threads_per_process"] test_multiprocessing_context = test_keyword_argument_pair["multiprocessing_context"] - with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: + with ZarrIO(path=zarr_top_level_path, manager=get_manager(), mode="w") as io: io.write( container=dynamic_table, number_of_jobs=number_of_jobs, max_threads_per_process=test_max_threads_per_process, - multiprocessing_context=test_multiprocessing_context + multiprocessing_context=test_multiprocessing_context, ) assert io._ZarrIO__dci_queue.max_threads_per_process == test_max_threads_per_process diff --git a/tests/unit/test_zarrdataio.py b/tests/unit/test_zarrdataio.py index f52f2f5c..963308e3 100644 --- a/tests/unit/test_zarrdataio.py +++ b/tests/unit/test_zarrdataio.py @@ -6,6 +6,7 @@ more complex operations and are more akin to integration tests This module focuses on test for specific unit functions of ZarrDataIO. """ + import numcodecs import h5py import os @@ -16,6 +17,7 @@ try: import hdf5plugin + HDF5PLUGIN = True except ImportError: HDF5PLUGIN = False @@ -23,8 +25,10 @@ from hdmf_zarr.utils import ZarrDataIO from tests.unit.utils import get_temp_filepath + class TestZarrDataIO(TestCase): """Test the ZarrDataIO class""" + def setUp(self): self.hdf_filename = get_temp_filepath() self.zarr_filename = get_temp_filepath() @@ -41,8 +45,8 @@ def tearDown(self): def test_hdf5_to_zarr_filters_scaleoffset(self): """Test that we warn when the scaleoffset filter is being used in HDF5 in ZarrDataIO.hdf5_to_zarr_filters.""" # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') - h5dset = h5file.create_dataset(name='test_dset', data=[1,2,3,4,5], scaleoffset=10) + h5file = h5py.File(self.hdf_filename, mode="a") + h5dset = h5file.create_dataset(name="test_dset", data=[1, 2, 3, 4, 5], scaleoffset=10) # test that we warn due to the scaleoffset msg = "/test_dset HDF5 scaleoffset filter ignored in Zarr" with self.assertWarnsWith(UserWarning, msg): @@ -54,8 +58,8 @@ def test_hdf5_to_zarr_filters_scaleoffset(self): def test_hdf5_to_zarr_filters_lzf(self): """Test that we warn when the lzf filter is being used in HDF5 in ZarrDataIO.hdf5_to_zarr_filters.""" # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') - h5dset = h5file.create_dataset(name='test_dset', data=[1, 2, 3, 4, 5], compression="lzf") + h5file = h5py.File(self.hdf_filename, mode="a") + h5dset = h5file.create_dataset(name="test_dset", data=[1, 2, 3, 4, 5], compression="lzf") # test that we warn due to the scaleoffset msg = "/test_dset HDF5 szip or lzf compression ignored in Zarr" with self.assertWarnsWith(UserWarning, msg): @@ -68,10 +72,12 @@ def test_hdf5_to_zarr_filters_lzf(self): def test_hdf5_to_zarr_filters_lz4(self): """Test that we warn when the lz4 filter is being used in HDF5 in ZarrDataIO.hdf5_to_zarr_filters.""" # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') - h5dset = h5file.create_dataset(name='test_dset', - data=[1, 2, 3, 4, 5], - **hdf5plugin.LZ4()) + h5file = h5py.File(self.hdf_filename, mode="a") + h5dset = h5file.create_dataset( + name="test_dset", + data=[1, 2, 3, 4, 5], + **hdf5plugin.LZ4(), + ) # test that we warn due to the scaleoffset msg = "/test_dset HDF5 lz4 compression ignored in Zarr" with self.assertWarnsWith(UserWarning, msg): @@ -84,10 +90,12 @@ def test_hdf5_to_zarr_filters_lz4(self): def test_hdf5_to_zarr_filters_bitshuffle(self): """Test that we warn when the bitshuffle filter is being used in HDF5 in ZarrDataIO.hdf5_to_zarr_filters.""" # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') - h5dset = h5file.create_dataset(name='test_dset', - data=[1, 2, 3, 4, 5], - **hdf5plugin.Bitshuffle(nelems=0, lz4=True)) + h5file = h5py.File(self.hdf_filename, mode="a") + h5dset = h5file.create_dataset( + name="test_dset", + data=[1, 2, 3, 4, 5], + **hdf5plugin.Bitshuffle(nelems=0, lz4=True), + ) # test that we warn due to the scaleoffset msg = "/test_dset HDF5 bitshuffle compression ignored in Zarr" with self.assertWarnsWith(UserWarning, msg): @@ -103,11 +111,12 @@ def test_hdf5_to_zarr_filters_other_unsupported(self): This test is to ensure that the catch-all at the end of the loop works. """ # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') + h5file = h5py.File(self.hdf_filename, mode="a") h5dset_FciDecomp = h5file.create_dataset( - name='test_fcidecomp', + name="test_fcidecomp", data=[1, 2, 3, 4, 5], - **hdf5plugin.FciDecomp()) + **hdf5plugin.FciDecomp(), + ) # test that we warn due to the FciDecomp msg = r"/test_fcidecomp HDF5 filter id 32018 with properties .* ignored in Zarr." with self.assertWarnsRegex(UserWarning, msg): @@ -119,9 +128,17 @@ def test_hdf5_to_zarr_filters_other_unsupported(self): def test_hdf5_to_zarr_filters_shuffle(self): """Test HDF5 shuffle filter works with ZarrDataIO.hdf5_to_zarr_filters.""" # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') - h5dset_int = h5file.create_dataset(name='test_int', data=np.arange(5, dtype='int32'), shuffle=True) - h5dset_float = h5file.create_dataset(name='test_float', data=np.arange(5, dtype='float32'), shuffle=True) + h5file = h5py.File(self.hdf_filename, mode="a") + h5dset_int = h5file.create_dataset( + name="test_int", + data=np.arange(5, dtype="int32"), + shuffle=True, + ) + h5dset_float = h5file.create_dataset( + name="test_float", + data=np.arange(5, dtype="float32"), + shuffle=True, + ) # test that we apply shuffle filter on int data filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset_int) self.assertEqual(len(filters), 1) @@ -136,17 +153,17 @@ def test_hdf5_to_zarr_filters_shuffle(self): def test_hdf5_to_zarr_filters_blosclz(self): """Test HDF5 blosclz filter works with ZarrDataIO.hdf5_to_zarr_filters.""" # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') + h5file = h5py.File(self.hdf_filename, mode="a") h5dset = h5file.create_dataset( - name='test_int', - data=np.arange(100, dtype='float32'), - **hdf5plugin.Blosc(cname='blosclz', clevel=9, shuffle=hdf5plugin.Blosc.SHUFFLE) + name="test_int", + data=np.arange(100, dtype="float32"), + **hdf5plugin.Blosc(cname="blosclz", clevel=9, shuffle=hdf5plugin.Blosc.SHUFFLE), ) # test that we apply shuffle filter on int data filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset) self.assertEqual(len(filters), 1) self.assertIsInstance(filters[0], numcodecs.Blosc) - self.assertEqual(filters[0].cname, 'blosclz') + self.assertEqual(filters[0].cname, "blosclz") self.assertEqual(filters[0].clevel, 9) self.assertEqual(filters[0].shuffle, hdf5plugin.Blosc.SHUFFLE) h5file.close() @@ -155,11 +172,11 @@ def test_hdf5_to_zarr_filters_blosclz(self): def test_hdf5_to_zarr_filters_zstd(self): """Test HDF5 zstd filter works with ZarrDataIO.hdf5_to_zarr_filters.""" # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') + h5file = h5py.File(self.hdf_filename, mode="a") h5dset = h5file.create_dataset( - name='test_int', - data=np.arange(100, dtype='float32'), - **hdf5plugin.Zstd(clevel=22) + name="test_int", + data=np.arange(100, dtype="float32"), + **hdf5plugin.Zstd(clevel=22), ) # test that we apply shuffle filter on int data filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset) @@ -172,12 +189,12 @@ def test_hdf5_to_zarr_filters_zstd(self): def test_hdf5_to_zarr_filters_gzip(self): """Test HDF5 gzip filter works with ZarrDataIO.hdf5_to_zarr_filters.""" # Create a test HDF5 dataset with scaleoffset - h5file = h5py.File(self.hdf_filename, mode='a') + h5file = h5py.File(self.hdf_filename, mode="a") h5dset = h5file.create_dataset( - name='test_int', - data=np.arange(100, dtype='float32'), - compression='gzip', - compression_opts=2 + name="test_int", + data=np.arange(100, dtype="float32"), + compression="gzip", + compression_opts=2, ) # test that we apply shuffle filter on int data filters = ZarrDataIO.hdf5_to_zarr_filters(h5dset) @@ -189,32 +206,33 @@ def test_hdf5_to_zarr_filters_gzip(self): def test_is_h5py_dataset(self): """Test ZarrDataIO.is_h5py_dataset""" - h5file = h5py.File(self.hdf_filename, mode='a') - arr=np.arange(10) - h5dset = h5file.create_dataset(name='test', data=arr) + h5file = h5py.File(self.hdf_filename, mode="a") + arr = np.arange(10) + h5dset = h5file.create_dataset(name="test", data=arr) self.assertTrue(ZarrDataIO.is_h5py_dataset(h5dset)) self.assertFalse(ZarrDataIO.is_h5py_dataset(arr)) def test_from_h5py_dataset(self): """Test ZarrDataIO.from_h5py_dataset""" - h5file = h5py.File(self.hdf_filename, mode='a') + h5file = h5py.File(self.hdf_filename, mode="a") h5dset = h5file.create_dataset( - name='test', - data=np.arange(1000).reshape((10,100)), - compression='gzip', + name="test", + data=np.arange(1000).reshape((10, 100)), + compression="gzip", compression_opts=6, shuffle=True, fillvalue=100, - chunks=(5,10)) + chunks=(5, 10), + ) re_zarrdataio = ZarrDataIO.from_h5py_dataset(h5dset) # Test that all settings are being presevered when creating the ZarrDataIO object self.assertIsInstance(re_zarrdataio, ZarrDataIO) self.assertEqual(re_zarrdataio.data, h5dset) self.assertEqual(re_zarrdataio.fillvalue, 100) - self.assertEqual(re_zarrdataio.chunks, (5,10)) - self.assertEqual(len(re_zarrdataio.io_settings['filters']), 2) - self.assertIsInstance(re_zarrdataio.io_settings['filters'][0], numcodecs.Shuffle) - self.assertIsInstance(re_zarrdataio.io_settings['filters'][1], numcodecs.Zlib) + self.assertEqual(re_zarrdataio.chunks, (5, 10)) + self.assertEqual(len(re_zarrdataio.io_settings["filters"]), 2) + self.assertIsInstance(re_zarrdataio.io_settings["filters"][0], numcodecs.Shuffle) + self.assertIsInstance(re_zarrdataio.io_settings["filters"][1], numcodecs.Zlib) # Close the HDF5 file h5file.close() @@ -223,15 +241,16 @@ def test_from_h5py_dataset_bytes_fillvalue(self): Test ZarrDataIO.from_h5py_dataset with a fillvalue that is in bytes, which needs to be handled separately since bytes are not JSON serializable by default """ - h5file = h5py.File(self.hdf_filename, mode='a') + h5file = h5py.File(self.hdf_filename, mode="a") # print(np.arange(10, dtype=np.int8).tobytes()) h5dset = h5file.create_dataset( - name='test_str', - data=[b'hello', b'world', b'go'], - fillvalue=b'None') + name="test_str", + data=[b"hello", b"world", b"go"], + fillvalue=b"None", + ) re_zarrdataio = ZarrDataIO.from_h5py_dataset(h5dset) # Test that all settings are being presevered when creating the ZarrDataIO object self.assertIsInstance(re_zarrdataio, ZarrDataIO) - self.assertEqual(re_zarrdataio.io_settings['fill_value'], str("None")) + self.assertEqual(re_zarrdataio.io_settings["fill_value"], str("None")) # Close the HDF5 file - h5file.close() \ No newline at end of file + h5file.close() diff --git a/tests/unit/test_zarrio.py b/tests/unit/test_zarrio.py index 50eb9130..0dfa2e35 100644 --- a/tests/unit/test_zarrio.py +++ b/tests/unit/test_zarrio.py @@ -9,13 +9,15 @@ classes will then be run here with all different backends so that we don't need to implement the tests separately for the different backends. """ -from tests.unit.base_tests_zarrio import (BaseTestZarrWriter, - ZarrStoreTestCase, - BaseTestZarrWriteUnit, - BaseTestExportZarrToZarr) -from zarr.storage import (DirectoryStore, - NestedDirectoryStore) -from tests.unit.utils import (Baz, BazData, BazBucket, get_baz_buildmanager) + +from tests.unit.base_tests_zarrio import ( + BaseTestZarrWriter, + ZarrStoreTestCase, + BaseTestZarrWriteUnit, + BaseTestExportZarrToZarr, +) +from zarr.storage import DirectoryStore, NestedDirectoryStore +from tests.unit.utils import Baz, BazData, BazBucket, get_baz_buildmanager import zarr from hdmf_zarr.backend import ZarrIO @@ -40,6 +42,7 @@ class TestZarrWriterDefaultStore(BaseTestZarrWriter): All settings are already defined in the BaseTestZarrWriter class so we here only need to instantiate the class to run the tests. """ + pass @@ -50,6 +53,7 @@ class TestZarrWriteUnitDefaultStore(BaseTestZarrWriteUnit): All settings are already defined in the BaseTestZarrWriter class so we here only need to instantiate the class to run the tests. """ + pass @@ -60,6 +64,7 @@ class TestExportZarrToZarrDefaultStore(BaseTestExportZarrToZarr): All settings are already defined in the BaseTestZarrWriter class so we here only need to instantiate the class to run the tests. """ + pass @@ -68,6 +73,7 @@ class TestExportZarrToZarrDefaultStore(BaseTestExportZarrToZarr): ######################################### class TestZarrWriterDirectoryStore(BaseTestZarrWriter): """Test writing of builder with Zarr using a custom DirectoryStore""" + def setUp(self): super().setUp() self.store = DirectoryStore(self.store_path) @@ -75,6 +81,7 @@ def setUp(self): class TestZarrWriteUnitDirectoryStore(BaseTestZarrWriteUnit): """Unit test for individual write functions using a custom DirectoryStore""" + def setUp(self): self.store_path = "test_io.zarr" self.store = DirectoryStore(self.store_path) @@ -82,6 +89,7 @@ def setUp(self): class TestExportZarrToZarrDirectoryStore(BaseTestExportZarrToZarr): """Test exporting Zarr to Zarr using DirectoryStore""" + def setUp(self): super().setUp() self.store = [DirectoryStore(p) for p in self.store_path] @@ -92,6 +100,7 @@ def setUp(self): ######################################### class TestZarrWriterNestedDirectoryStore(BaseTestZarrWriter): """Test writing of builder with Zarr using a custom NestedDirectoryStore""" + def setUp(self): super().setUp() self.store = NestedDirectoryStore(self.store_path) @@ -99,6 +108,7 @@ def setUp(self): class TestZarrWriteUnitNestedDirectoryStore(BaseTestZarrWriteUnit): """Unit test for individual write functions using a custom NestedDirectoryStore""" + def setUp(self): self.store_path = "test_io.zarr" self.store = NestedDirectoryStore(self.store_path) @@ -106,6 +116,7 @@ def setUp(self): class TestExportZarrToZarrNestedDirectoryStore(BaseTestExportZarrToZarr): """Test exporting Zarr to Zarr using NestedDirectoryStore""" + def setUp(self): super().setUp() self.store = [NestedDirectoryStore(p) for p in self.store_path] @@ -116,6 +127,7 @@ def setUp(self): ######################################### class TestPathlib(BaseTestZarrWriter): """Test writing of builder with Zarr using a custom DirectoryStore""" + def setUp(self): super().setUp() self.store = pathlib.Path(self.store_path) @@ -128,30 +140,31 @@ class TestConsolidateMetadata(ZarrStoreTestCase): """ Tests for consolidated metadata and corresponding helper methods. """ + def test_get_store_path_shallow(self): self.create_zarr(consolidate_metadata=False) store = DirectoryStore(self.store_path) path = ZarrIO._ZarrIO__get_store_path(store) - expected_path = os.path.abspath('test_io.zarr') + expected_path = os.path.abspath("test_io.zarr") self.assertEqual(path, expected_path) def test_get_store_path_deep(self): self.create_zarr() - zarr_obj = zarr.open_consolidated(self.store_path, mode='r') + zarr_obj = zarr.open_consolidated(self.store_path, mode="r") store = zarr_obj.store path = ZarrIO._ZarrIO__get_store_path(store) - expected_path = os.path.abspath('test_io.zarr') + expected_path = os.path.abspath("test_io.zarr") self.assertEqual(path, expected_path) def test_force_open_without_consolidated(self): """Test that read-mode -r forces a regular read with mode r""" self.create_zarr(consolidate_metadata=True) # Confirm that opening the file 'r' mode indeed uses the consolidated metadata - with ZarrIO(self.store_path, mode='r') as read_io: + with ZarrIO(self.store_path, mode="r") as read_io: read_io.open() self.assertIsInstance(read_io.file.store, zarr.storage.ConsolidatedMetadataStore) # Confirm that opening the file IN 'r-' mode indeed forces a regular open without consolidated metadata - with ZarrIO(self.store_path, mode='r-') as read_io: + with ZarrIO(self.store_path, mode="r-") as read_io: read_io.open() self.assertIsInstance(read_io.file.store, zarr.storage.DirectoryStore) @@ -161,17 +174,18 @@ def test_force_open_without_consolidated_fails(self): is used to force read without consolidated metadata. """ self.create_zarr(consolidate_metadata=True) - with ZarrIO(self.store_path, mode='r') as read_io: + with ZarrIO(self.store_path, mode="r") as read_io: # Check that using 'r-' fails - msg = 'Mode r- not allowed for reading with consolidated metadata' + msg = "Mode r- not allowed for reading with consolidated metadata" with self.assertRaisesWith(ValueError, msg): - read_io._ZarrIO__open_file_consolidated(store=self.store_path, mode='r-') + read_io._ZarrIO__open_file_consolidated(store=self.store_path, mode="r-") # Check that using 'r' does not fail try: - read_io._ZarrIO__open_file_consolidated(store=self.store_path, mode='r') + read_io._ZarrIO__open_file_consolidated(store=self.store_path, mode="r") except ValueError as e: self.fail("ZarrIO.__open_file_consolidated raised an unexpected ValueError: {}".format(e)) + class TestOverwriteExistingFile(ZarrStoreTestCase): def test_force_overwrite_when_file_exists(self): """ @@ -183,7 +197,7 @@ def test_force_overwrite_when_file_exists(self): file.write("Just a test file used in TestOverwriteExistingFile") # try to create a Zarr file at the same location (i.e., self.store) as the # test text file to force overwriting the existing file. - self.create_zarr(force_overwrite=True, mode='w') + self.create_zarr(force_overwrite=True, mode="w") def test_force_overwrite_when_dir_exists(self): """ @@ -193,7 +207,7 @@ def test_force_overwrite_when_dir_exists(self): # create a Zarr file self.create_zarr() # try to overwrite the existing Zarr file - self.create_zarr(force_overwrite=True, mode='w') + self.create_zarr(force_overwrite=True, mode="w") class TestDimensionLabels(BuildDatasetShapeMixin): @@ -205,33 +219,34 @@ class TestDimensionLabels(BuildDatasetShapeMixin): ii) Create and write a BarDataHolder with a BarData. iii) Read and check that the _ARRAY_DIMENSIONS attribute is set. """ + def tearDown(self): shutil.rmtree(self.store) def get_base_shape_dims(self): - return [None, None], ['a', 'b'] + return [None, None], ["a", "b"] def get_dataset_inc_spec(self): dataset_inc_spec = DatasetSpec( - doc='A BarData', - data_type_inc='BarData', - quantity='*', + doc="A BarData", + data_type_inc="BarData", + quantity="*", ) return dataset_inc_spec def test_build(self): - bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_inst = BarData(name="my_bar", data=[[1, 2, 3], [4, 5, 6]], attr1="a string") bar_data_holder_inst = BarDataHolder( - name='my_bar_holder', + name="my_bar_holder", bar_datas=[bar_data_inst], ) - with ZarrIO(self.store, manager=self.manager, mode='w') as io: + with ZarrIO(self.store, manager=self.manager, mode="w") as io: io.write(bar_data_holder_inst) - with ZarrIO(self.store, manager=self.manager, mode='r') as io: + with ZarrIO(self.store, manager=self.manager, mode="r") as io: file = io.read() - self.assertEqual(file.bar_datas[0].data.attrs['_ARRAY_DIMENSIONS'], ['a', 'b']) + self.assertEqual(file.bar_datas[0].data.attrs["_ARRAY_DIMENSIONS"], ["a", "b"]) class TestDatasetofReferences(ZarrStoreTestCase): @@ -243,7 +258,7 @@ def tearDown(self): """ Remove all files and folders defined by self.store_path """ - paths = self.store_path if isinstance(self.store_path, list) else [self.store_path, ] + paths = self.store_path if isinstance(self.store_path, list) else [self.store_path] for path in paths: if os.path.exists(path): if os.path.isdir(path): @@ -258,17 +273,17 @@ def test_append_references(self): num_bazs = 10 bazs = [] # set up dataset of references for i in range(num_bazs): - bazs.append(Baz(name='baz%d' % i)) - baz_data = BazData(name='baz_data', data=bazs) + bazs.append(Baz(name="baz%d" % i)) + baz_data = BazData(name="baz_data", data=bazs) container = BazBucket(bazs=bazs, baz_data=baz_data) manager = get_baz_buildmanager() - with ZarrIO(self.store, manager=manager, mode='w') as writer: + with ZarrIO(self.store, manager=manager, mode="w") as writer: writer.write(container=container) - with ZarrIO(self.store, manager=manager, mode='a') as append_io: + with ZarrIO(self.store, manager=manager, mode="a") as append_io: read_container = append_io.read() - new_baz = Baz(name='new') + new_baz = Baz(name="new") read_container.add_baz(new_baz) DoR = read_container.baz_data.data @@ -276,7 +291,7 @@ def test_append_references(self): append_io.write(read_container) - with ZarrIO(self.store, manager=manager, mode='r') as append_io: + with ZarrIO(self.store, manager=manager, mode="r") as append_io: read_container = append_io.read() self.assertEqual(len(read_container.baz_data.data), 11) self.assertIs(read_container.baz_data.data[10], read_container.bazs["new"]) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index de343acd..faffbb4f 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -3,17 +3,27 @@ from copy import copy, deepcopy from abc import ABCMeta, abstractmethod -from hdmf.build import (ObjectMapper, TypeMap, BuildManager) -from hdmf.container import (Container, Data) -from hdmf.spec import (GroupSpec, DatasetSpec, AttributeSpec, LinkSpec, - RefSpec, DtypeSpec, NamespaceCatalog, SpecCatalog, - SpecNamespace, NamespaceBuilder, Spec) -from hdmf.spec.spec import (ZERO_OR_MANY, ONE_OR_MANY, ZERO_OR_ONE) -from hdmf.utils import (docval, getargs, get_docval) +from hdmf.build import ObjectMapper, TypeMap, BuildManager +from hdmf.container import Container, Data +from hdmf.spec import ( + GroupSpec, + DatasetSpec, + AttributeSpec, + LinkSpec, + RefSpec, + DtypeSpec, + NamespaceCatalog, + SpecCatalog, + SpecNamespace, + NamespaceBuilder, + Spec, +) +from hdmf.spec.spec import ZERO_OR_MANY, ONE_OR_MANY, ZERO_OR_ONE +from hdmf.utils import docval, getargs, get_docval from hdmf.testing import TestCase from hdmf_zarr.backend import ROOT_NAME -CORE_NAMESPACE = 'test_core' +CORE_NAMESPACE = "test_core" class CacheSpecTestHelper(object): @@ -23,8 +33,8 @@ def get_types(catalog): types = set() for ns_name in catalog.namespaces: ns = catalog.get_namespace(ns_name) - for source in ns['schema']: - types.update(catalog.get_types(source['source'])) + for source in ns["schema"]: + types.update(catalog.get_types(source["source"])) return types @@ -39,8 +49,9 @@ def get_temp_filepath(): def check_s3fs_ffspec_installed(): """Check if s3fs and ffspec are installed required for streaming access from S3""" try: - import s3fs # noqa F401 + import s3fs # noqa F401 import fsspec # noqa F401 + return True except ImportError: return False @@ -51,13 +62,15 @@ def check_s3fs_ffspec_installed(): ########################################### class Foo(Container): - @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, - {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'some data'}, - {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, - {'name': 'attr2', 'type': int, 'doc': 'another attribute'}, - {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14}) + @docval( + {"name": "name", "type": str, "doc": "the name of this Foo"}, + {"name": "my_data", "type": ("array_data", "data"), "doc": "some data"}, + {"name": "attr1", "type": str, "doc": "an attribute"}, + {"name": "attr2", "type": int, "doc": "another attribute"}, + {"name": "attr3", "type": float, "doc": "a third attribute", "default": 3.14}, + ) def __init__(self, **kwargs): - name, my_data, attr1, attr2, attr3 = getargs('name', 'my_data', 'attr1', 'attr2', 'attr3', kwargs) + name, my_data, attr1, attr2, attr3 = getargs("name", "my_data", "attr1", "attr2", "attr3", kwargs) super().__init__(name=name) self.__data = my_data self.__attr1 = attr1 @@ -65,12 +78,12 @@ def __init__(self, **kwargs): self.__attr3 = attr3 def __eq__(self, other): - attrs = ('name', 'my_data', 'attr1', 'attr2', 'attr3') + attrs = ("name", "my_data", "attr1", "attr2", "attr3") return all(getattr(self, a) == getattr(other, a) for a in attrs) def __str__(self): - attrs = ('name', 'my_data', 'attr1', 'attr2', 'attr3') - return '<' + ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs) + '>' + attrs = ("name", "my_data", "attr1", "attr2", "attr3") + return "<" + ",".join("%s=%s" % (a, getattr(self, a)) for a in attrs) + ">" @property def my_data(self): @@ -94,10 +107,12 @@ def __hash__(self): class FooBucket(Container): - @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'}, - {'name': 'foos', 'type': list, 'doc': 'the Foo objects in this bucket', 'default': list()}) + @docval( + {"name": "name", "type": str, "doc": "the name of this bucket"}, + {"name": "foos", "type": list, "doc": "the Foo objects in this bucket", "default": list()}, + ) def __init__(self, **kwargs): - name, foos = getargs('name', 'foos', kwargs) + name, foos = getargs("name", "foos", kwargs) super().__init__(name=name) self.__foos = {f.name: f for f in foos} # note: collections of groups are unordered in HDF5 for f in foos: @@ -107,7 +122,7 @@ def __eq__(self, other): return self.name == other.name and self.foos == other.foos def __str__(self): - return 'name=%s, foos=%s' % (self.name, self.foos) + return "name=%s, foos=%s" % (self.name, self.foos) @property def foos(self): @@ -126,14 +141,16 @@ class FooFile(Container): and should be reset to 'root' when use is finished to avoid potential cross-talk between tests. """ - @docval({'name': 'buckets', 'type': list, 'doc': 'the FooBuckets in this file', 'default': list()}, - {'name': 'foo_link', 'type': Foo, 'doc': 'an optional linked Foo', 'default': None}, - {'name': 'foofile_data', 'type': 'array_data', 'doc': 'an optional dataset', 'default': None}, - {'name': 'foo_ref_attr', 'type': Foo, 'doc': 'a reference Foo', 'default': None}, - ) + @docval( + {"name": "buckets", "type": list, "doc": "the FooBuckets in this file", "default": list()}, + {"name": "foo_link", "type": Foo, "doc": "an optional linked Foo", "default": None}, + {"name": "foofile_data", "type": "array_data", "doc": "an optional dataset", "default": None}, + {"name": "foo_ref_attr", "type": Foo, "doc": "a reference Foo", "default": None}, + ) def __init__(self, **kwargs): - buckets, foo_link, foofile_data, foo_ref_attr = getargs('buckets', 'foo_link', 'foofile_data', - 'foo_ref_attr', kwargs) + buckets, foo_link, foofile_data, foo_ref_attr = getargs( + "buckets", "foo_link", "foofile_data", "foo_ref_attr", kwargs + ) super().__init__(name=ROOT_NAME) # name is not used - FooFile should be the root container self.__buckets = {b.name: b for b in buckets} # note: collections of groups are unordered in HDF5 for f in buckets: @@ -143,12 +160,14 @@ def __init__(self, **kwargs): self.__foo_ref_attr = foo_ref_attr def __eq__(self, other): - return (self.buckets == other.buckets - and self.foo_link == other.foo_link - and self.foofile_data == other.foofile_data) + return ( + self.buckets == other.buckets + and self.foo_link == other.foo_link + and self.foofile_data == other.foofile_data + ) def __str__(self): - return ('buckets=%s, foo_link=%s, foofile_data=%s' % (self.buckets, self.foo_link, self.foofile_data)) + return "buckets=%s, foo_link=%s, foofile_data=%s" % (self.buckets, self.foo_link, self.foofile_data) @property def buckets(self): @@ -204,91 +223,93 @@ def get_foo_buildmanager(): :return: """ - foo_spec = GroupSpec('A test group specification with a data type', - data_type_def='Foo', - datasets=[DatasetSpec('an example dataset', - 'int', - name='my_data', - attributes=[AttributeSpec('attr2', - 'an example integer attribute', - 'int')])], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), - AttributeSpec('attr3', 'an example float attribute', 'float')]) - - tmp_spec = GroupSpec('A subgroup for Foos', - name='foo_holder', - groups=[GroupSpec('the Foos in this bucket', data_type_inc='Foo', quantity=ZERO_OR_MANY)]) - - bucket_spec = GroupSpec('A test group specification for a data type containing data type', - data_type_def='FooBucket', - groups=[tmp_spec]) + foo_spec = GroupSpec( + "A test group specification with a data type", + data_type_def="Foo", + datasets=[ + DatasetSpec( + "an example dataset", + "int", + name="my_data", + attributes=[AttributeSpec("attr2", "an example integer attribute", "int")], + ) + ], + attributes=[ + AttributeSpec("attr1", "an example string attribute", "text"), + AttributeSpec("attr3", "an example float attribute", "float"), + ], + ) + + tmp_spec = GroupSpec( + "A subgroup for Foos", + name="foo_holder", + groups=[GroupSpec("the Foos in this bucket", data_type_inc="Foo", quantity=ZERO_OR_MANY)], + ) + + bucket_spec = GroupSpec( + "A test group specification for a data type containing data type", data_type_def="FooBucket", groups=[tmp_spec] + ) class FooMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - my_data_spec = spec.get_dataset('my_data') - self.map_spec('attr2', my_data_spec.get_attribute('attr2')) + my_data_spec = spec.get_dataset("my_data") + self.map_spec("attr2", my_data_spec.get_attribute("attr2")) class BucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - foo_holder_spec = spec.get_group('foo_holder') + foo_holder_spec = spec.get_group("foo_holder") self.unmap(foo_holder_spec) - foo_spec = foo_holder_spec.get_data_type('Foo') - self.map_spec('foos', foo_spec) - - file_links_spec = GroupSpec('Foo link group', - name='links', - links=[LinkSpec('Foo link', - name='foo_link', - target_type='Foo', - quantity=ZERO_OR_ONE)] - ) - - file_spec = GroupSpec("A file of Foos contained in FooBuckets", - data_type_def='FooFile', - groups=[GroupSpec('Holds the FooBuckets', - name='buckets', - groups=[GroupSpec("One or more FooBuckets", - data_type_inc='FooBucket', - quantity=ZERO_OR_MANY)]), - file_links_spec], - datasets=[DatasetSpec('Foo data', - name='foofile_data', - dtype='int', - quantity=ZERO_OR_ONE)], - attributes=[AttributeSpec(doc='Foo ref attr', - name='foo_ref_attr', - dtype=RefSpec('Foo', 'object'), - required=False)], - ) + foo_spec = foo_holder_spec.get_data_type("Foo") + self.map_spec("foos", foo_spec) + + file_links_spec = GroupSpec( + "Foo link group", + name="links", + links=[LinkSpec("Foo link", name="foo_link", target_type="Foo", quantity=ZERO_OR_ONE)], + ) + + file_spec = GroupSpec( + "A file of Foos contained in FooBuckets", + data_type_def="FooFile", + groups=[ + GroupSpec( + "Holds the FooBuckets", + name="buckets", + groups=[GroupSpec("One or more FooBuckets", data_type_inc="FooBucket", quantity=ZERO_OR_MANY)], + ), + file_links_spec, + ], + datasets=[DatasetSpec("Foo data", name="foofile_data", dtype="int", quantity=ZERO_OR_ONE)], + attributes=[ + AttributeSpec(doc="Foo ref attr", name="foo_ref_attr", dtype=RefSpec("Foo", "object"), required=False) + ], + ) class FileMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - bucket_spec = spec.get_group('buckets').get_data_type('FooBucket') - self.map_spec('buckets', bucket_spec) - self.unmap(spec.get_group('links')) - foo_link_spec = spec.get_group('links').get_link('foo_link') - self.map_spec('foo_link', foo_link_spec) + bucket_spec = spec.get_group("buckets").get_data_type("FooBucket") + self.map_spec("buckets", bucket_spec) + self.unmap(spec.get_group("links")) + foo_link_spec = spec.get_group("links").get_link("foo_link") + self.map_spec("foo_link", foo_link_spec) spec_catalog = SpecCatalog() - spec_catalog.register_spec(foo_spec, 'test.yaml') - spec_catalog.register_spec(bucket_spec, 'test.yaml') - spec_catalog.register_spec(file_spec, 'test.yaml') + spec_catalog.register_spec(foo_spec, "test.yaml") + spec_catalog.register_spec(bucket_spec, "test.yaml") + spec_catalog.register_spec(file_spec, "test.yaml") namespace = SpecNamespace( - 'a test namespace', - CORE_NAMESPACE, - [{'source': 'test.yaml'}], - version='0.1.0', - catalog=spec_catalog) + "a test namespace", CORE_NAMESPACE, [{"source": "test.yaml"}], version="0.1.0", catalog=spec_catalog + ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) - type_map.register_container_type(CORE_NAMESPACE, 'Foo', Foo) - type_map.register_container_type(CORE_NAMESPACE, 'FooBucket', FooBucket) - type_map.register_container_type(CORE_NAMESPACE, 'FooFile', FooFile) + type_map.register_container_type(CORE_NAMESPACE, "Foo", Foo) + type_map.register_container_type(CORE_NAMESPACE, "FooBucket", FooBucket) + type_map.register_container_type(CORE_NAMESPACE, "FooFile", FooFile) type_map.register_map(Foo, FooMapper) type_map.register_map(FooBucket, BucketMapper) @@ -317,12 +338,14 @@ class BazCpdData(Data): class BazBucket(Container): - @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket', 'default': ROOT_NAME}, - {'name': 'bazs', 'type': list, 'doc': 'the Baz objects in this bucket'}, - {'name': 'baz_data', 'type': BazData, 'doc': 'dataset of Baz references', 'default': None}, - {'name': 'baz_cpd_data', 'type': BazCpdData, 'doc': 'dataset of Baz references', 'default': None}) + @docval( + {"name": "name", "type": str, "doc": "the name of this bucket", "default": ROOT_NAME}, + {"name": "bazs", "type": list, "doc": "the Baz objects in this bucket"}, + {"name": "baz_data", "type": BazData, "doc": "dataset of Baz references", "default": None}, + {"name": "baz_cpd_data", "type": BazCpdData, "doc": "dataset of Baz references", "default": None}, + ) def __init__(self, **kwargs): - name, bazs, baz_data, baz_cpd_data = getargs('name', 'bazs', 'baz_data', 'baz_cpd_data', kwargs) + name, bazs, baz_data, baz_cpd_data = getargs("name", "bazs", "baz_data", "baz_cpd_data", kwargs) super().__init__(name=name) self.__bazs = {b.name: b for b in bazs} # note: collections of groups are unordered in HDF5 for b in bazs: @@ -358,70 +381,75 @@ def remove_baz(self, baz_name): def get_baz_buildmanager(): baz_spec = GroupSpec( - doc='A test group specification with a data type', - data_type_def='Baz', + doc="A test group specification with a data type", + data_type_def="Baz", ) baz_data_spec = DatasetSpec( - doc='A test dataset of references specification with a data type', - name='baz_data', - data_type_def='BazData', - dtype=RefSpec('Baz', 'object'), + doc="A test dataset of references specification with a data type", + name="baz_data", + data_type_def="BazData", + dtype=RefSpec("Baz", "object"), shape=[None], ) baz_cpd_data_spec = DatasetSpec( - doc='A test compound dataset with references specification with a data type', - name='baz_cpd_data', - data_type_def='BazCpdData', - dtype=[DtypeSpec(name='part1', doc='doc', dtype='int'), - DtypeSpec(name='part2', doc='doc', dtype=RefSpec('Baz', 'object'))], + doc="A test compound dataset with references specification with a data type", + name="baz_cpd_data", + data_type_def="BazCpdData", + dtype=[ + DtypeSpec(name="part1", doc="doc", dtype="int"), + DtypeSpec(name="part2", doc="doc", dtype=RefSpec("Baz", "object")), + ], shape=[None], ) baz_holder_spec = GroupSpec( - doc='group of bazs', - name='bazs', - groups=[GroupSpec(doc='Baz', data_type_inc='Baz', quantity=ONE_OR_MANY)], + doc="group of bazs", + name="bazs", + groups=[GroupSpec(doc="Baz", data_type_inc="Baz", quantity=ONE_OR_MANY)], ) baz_bucket_spec = GroupSpec( - doc='A test group specification for a data type containing data type', - data_type_def='BazBucket', + doc="A test group specification for a data type containing data type", + data_type_def="BazBucket", groups=[baz_holder_spec], - datasets=[DatasetSpec(doc='doc', data_type_inc='BazData', quantity=ZERO_OR_ONE), - DatasetSpec(doc='doc', data_type_inc='BazCpdData', quantity=ZERO_OR_ONE)], + datasets=[ + DatasetSpec(doc="doc", data_type_inc="BazData", quantity=ZERO_OR_ONE), + DatasetSpec(doc="doc", data_type_inc="BazCpdData", quantity=ZERO_OR_ONE), + ], ) spec_catalog = SpecCatalog() - spec_catalog.register_spec(baz_spec, 'test.yaml') - spec_catalog.register_spec(baz_data_spec, 'test.yaml') - spec_catalog.register_spec(baz_cpd_data_spec, 'test.yaml') - spec_catalog.register_spec(baz_bucket_spec, 'test.yaml') + spec_catalog.register_spec(baz_spec, "test.yaml") + spec_catalog.register_spec(baz_data_spec, "test.yaml") + spec_catalog.register_spec(baz_cpd_data_spec, "test.yaml") + spec_catalog.register_spec(baz_bucket_spec, "test.yaml") namespace = SpecNamespace( - 'a test namespace', + "a test namespace", CORE_NAMESPACE, - [{'source': 'test.yaml'}], - version='0.1.0', - catalog=spec_catalog) + [{"source": "test.yaml"}], + version="0.1.0", + catalog=spec_catalog, + ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) - type_map.register_container_type(CORE_NAMESPACE, 'Baz', Baz) - type_map.register_container_type(CORE_NAMESPACE, 'BazData', BazData) - type_map.register_container_type(CORE_NAMESPACE, 'BazCpdData', BazCpdData) - type_map.register_container_type(CORE_NAMESPACE, 'BazBucket', BazBucket) + type_map.register_container_type(CORE_NAMESPACE, "Baz", Baz) + type_map.register_container_type(CORE_NAMESPACE, "BazData", BazData) + type_map.register_container_type(CORE_NAMESPACE, "BazCpdData", BazCpdData) + type_map.register_container_type(CORE_NAMESPACE, "BazBucket", BazBucket) class BazBucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - baz_holder_spec = spec.get_group('bazs') + baz_holder_spec = spec.get_group("bazs") self.unmap(baz_holder_spec) - baz_spec = baz_holder_spec.get_data_type('Baz') - self.map_spec('bazs', baz_spec) + baz_spec = baz_holder_spec.get_data_type("Baz") + self.map_spec("bazs", baz_spec) type_map.register_map(BazBucket, BazBucketMapper) @@ -438,15 +466,15 @@ def create_test_type_map(specs, container_classes, mappers=None): :return: the constructed TypeMap """ spec_catalog = SpecCatalog() - schema_file = 'test.yaml' + schema_file = "test.yaml" for s in specs: spec_catalog.register_spec(s, schema_file) namespace = SpecNamespace( - doc='a test namespace', + doc="a test namespace", name=CORE_NAMESPACE, - schema=[{'source': schema_file}], - version='0.1.0', - catalog=spec_catalog + schema=[{"source": schema_file}], + version="0.1.0", + catalog=spec_catalog, ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) @@ -475,11 +503,11 @@ def create_load_namespace_yaml(namespace_name, specs, output_dir, incl_types, ty """ ns_builder = NamespaceBuilder( name=namespace_name, - doc='a test namespace', - version='0.1.0', + doc="a test namespace", + version="0.1.0", ) - ns_filename = ns_builder.name + '.namespace.yaml' - ext_filename = ns_builder.name + '.extensions.yaml' + ns_filename = ns_builder.name + ".namespace.yaml" + ext_filename = ns_builder.name + ".extensions.yaml" for ns, types in incl_types.items(): if types is None: # include all types @@ -498,39 +526,52 @@ def create_load_namespace_yaml(namespace_name, specs, output_dir, incl_types, ty # ##### custom spec classes ##### + def swap_inc_def(cls, custom_cls): args = get_docval(cls.__init__) ret = list() for arg in args: - if arg['name'] == 'data_type_def': - ret.append({'name': 'my_data_type_def', 'type': str, - 'doc': 'the NWB data type this spec defines', 'default': None}) - elif arg['name'] == 'data_type_inc': - ret.append({'name': 'my_data_type_inc', 'type': (custom_cls, str), - 'doc': 'the NWB data type this spec includes', 'default': None}) + if arg["name"] == "data_type_def": + ret.append( + { + "name": "my_data_type_def", + "type": str, + "doc": "the NWB data type this spec defines", + "default": None, + } + ) + elif arg["name"] == "data_type_inc": + ret.append( + { + "name": "my_data_type_inc", + "type": (custom_cls, str), + "doc": "the NWB data type this spec includes", + "default": None, + } + ) else: ret.append(copy(arg)) return ret class BaseStorageOverride: - __type_key = 'my_data_type' - __inc_key = 'my_data_type_inc' - __def_key = 'my_data_type_def' + __type_key = "my_data_type" + __inc_key = "my_data_type_inc" + __def_key = "my_data_type_def" @classmethod def type_key(cls): - ''' Get the key used to store data type on an instance''' + """Get the key used to store data type on an instance""" return cls.__type_key @classmethod def inc_key(cls): - ''' Get the key used to define a data_type include.''' + """Get the key used to define a data_type include.""" return cls.__inc_key @classmethod def def_key(cls): - ''' Get the key used to define a data_type definition.''' + """Get the key used to define a data_type definition.""" return cls.__def_key @classmethod @@ -556,7 +597,7 @@ def _translate_kwargs(cls, kwargs): class CustomGroupSpec(BaseStorageOverride, GroupSpec): - @docval(*deepcopy(swap_inc_def(GroupSpec, 'CustomGroupSpec'))) + @docval(*deepcopy(swap_inc_def(GroupSpec, "CustomGroupSpec"))) def __init__(self, **kwargs): kwargs = self._translate_kwargs(kwargs) super().__init__(**kwargs) @@ -565,15 +606,15 @@ def __init__(self, **kwargs): def dataset_spec_cls(cls): return CustomDatasetSpec - @docval(*deepcopy(swap_inc_def(GroupSpec, 'CustomGroupSpec'))) + @docval(*deepcopy(swap_inc_def(GroupSpec, "CustomGroupSpec"))) def add_group(self, **kwargs): spec = CustomGroupSpec(**kwargs) self.set_group(spec) return spec - @docval(*deepcopy(swap_inc_def(DatasetSpec, 'CustomDatasetSpec'))) + @docval(*deepcopy(swap_inc_def(DatasetSpec, "CustomDatasetSpec"))) def add_dataset(self, **kwargs): - ''' Add a new specification for a subgroup to this group specification ''' + """Add a new specification for a subgroup to this group specification""" spec = CustomDatasetSpec(**kwargs) self.set_dataset(spec) return spec @@ -581,14 +622,14 @@ def add_dataset(self, **kwargs): class CustomDatasetSpec(BaseStorageOverride, DatasetSpec): - @docval(*deepcopy(swap_inc_def(DatasetSpec, 'CustomDatasetSpec'))) + @docval(*deepcopy(swap_inc_def(DatasetSpec, "CustomDatasetSpec"))) def __init__(self, **kwargs): kwargs = self._translate_kwargs(kwargs) super().__init__(**kwargs) class CustomSpecNamespace(SpecNamespace): - __types_key = 'my_data_types' + __types_key = "my_data_types" @classmethod def types_key(cls): @@ -597,21 +638,23 @@ def types_key(cls): class BarData(Data): - @docval({'name': 'name', 'type': str, 'doc': 'the name of this BarData'}, - {'name': 'data', 'type': ('data', 'array_data'), 'doc': 'the data'}, - {'name': 'attr1', 'type': str, 'doc': 'a string attribute', 'default': None}, - {'name': 'attr2', 'type': 'int', 'doc': 'an int attribute', 'default': None}, - {'name': 'ext_attr', 'type': bool, 'doc': 'a boolean attribute', 'default': True}) + @docval( + {"name": "name", "type": str, "doc": "the name of this BarData"}, + {"name": "data", "type": ("data", "array_data"), "doc": "the data"}, + {"name": "attr1", "type": str, "doc": "a string attribute", "default": None}, + {"name": "attr2", "type": "int", "doc": "an int attribute", "default": None}, + {"name": "ext_attr", "type": bool, "doc": "a boolean attribute", "default": True}, + ) def __init__(self, **kwargs): - name, data, attr1, attr2, ext_attr = getargs('name', 'data', 'attr1', 'attr2', 'ext_attr', kwargs) + name, data, attr1, attr2, ext_attr = getargs("name", "data", "attr1", "attr2", "ext_attr", kwargs) super().__init__(name=name, data=data) self.__attr1 = attr1 self.__attr2 = attr2 - self.__ext_attr = kwargs['ext_attr'] + self.__ext_attr = kwargs["ext_attr"] @property def data_type(self): - return 'BarData' + return "BarData" @property def attr1(self): @@ -628,10 +671,12 @@ def ext_attr(self): class BarDataHolder(Container): - @docval({'name': 'name', 'type': str, 'doc': 'the name of this BarDataHolder'}, - {'name': 'bar_datas', 'type': ('data', 'array_data'), 'doc': 'bar_datas', 'default': list()}) + @docval( + {"name": "name", "type": str, "doc": "the name of this BarDataHolder"}, + {"name": "bar_datas", "type": ("data", "array_data"), "doc": "bar_datas", "default": list()}, + ) def __init__(self, **kwargs): - name, bar_datas = getargs('name', 'bar_datas', kwargs) + name, bar_datas = getargs("name", "bar_datas", kwargs) super().__init__(name=name) self.__bar_datas = bar_datas for b in bar_datas: @@ -640,7 +685,7 @@ def __init__(self, **kwargs): @property def data_type(self): - return 'BarDataHolder' + return "BarDataHolder" @property def bar_datas(self): @@ -649,17 +694,19 @@ def bar_datas(self): class ExtBarDataMapper(ObjectMapper): - @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"}, - {"name": "container", "type": BarData, "doc": "the container to get the attribute value from"}, - {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"}, - returns='the value of the attribute') + @docval( + {"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"}, + {"name": "container", "type": BarData, "doc": "the container to get the attribute value from"}, + {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"}, + returns="the value of the attribute", + ) def get_attr_value(self, **kwargs): - ''' Get the value of the attribute corresponding to this spec from the given container ''' - spec, container, manager = getargs('spec', 'container', 'manager', kwargs) + """Get the value of the attribute corresponding to this spec from the given container""" + spec, container, manager = getargs("spec", "container", "manager", kwargs) # handle custom mapping of field 'ext_attr' within container # BardataHolder/BarData -> spec BarDataHolder/BarData.ext_attr if isinstance(container.parent, BarDataHolder): - if spec.name == 'ext_attr': + if spec.name == "ext_attr": return container.ext_attr return super().get_attr_value(**kwargs) @@ -670,20 +717,20 @@ def setUp(self): self.store = "tests/unit/test_io.zarr" self.set_up_specs() spec_catalog = SpecCatalog() - spec_catalog.register_spec(self.bar_data_spec, 'test.yaml') - spec_catalog.register_spec(self.bar_data_holder_spec, 'test.yaml') + spec_catalog.register_spec(self.bar_data_spec, "test.yaml") + spec_catalog.register_spec(self.bar_data_holder_spec, "test.yaml") namespace = SpecNamespace( - doc='a test namespace', + doc="a test namespace", name=CORE_NAMESPACE, - schema=[{'source': 'test.yaml'}], - version='0.1.0', - catalog=spec_catalog + schema=[{"source": "test.yaml"}], + version="0.1.0", + catalog=spec_catalog, ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) - type_map.register_container_type(CORE_NAMESPACE, 'BarData', BarData) - type_map.register_container_type(CORE_NAMESPACE, 'BarDataHolder', BarDataHolder) + type_map.register_container_type(CORE_NAMESPACE, "BarData", BarData) + type_map.register_container_type(CORE_NAMESPACE, "BarDataHolder", BarDataHolder) type_map.register_map(BarData, ExtBarDataMapper) type_map.register_map(BarDataHolder, ObjectMapper) self.manager = BuildManager(type_map) @@ -691,15 +738,15 @@ def setUp(self): def set_up_specs(self): shape, dims = self.get_base_shape_dims() self.bar_data_spec = DatasetSpec( - doc='A test dataset specification with a data type', - data_type_def='BarData', - dtype='int', + doc="A test dataset specification with a data type", + data_type_def="BarData", + dtype="int", shape=shape, dims=dims, ) self.bar_data_holder_spec = GroupSpec( - doc='A container of multiple extended BarData objects', - data_type_def='BarDataHolder', + doc="A container of multiple extended BarData objects", + data_type_def="BarDataHolder", datasets=[self.get_dataset_inc_spec()], ) From 5ac9d279c670a0aaf632dddc55970d3b347f8b58 Mon Sep 17 00:00:00 2001 From: rly Date: Wed, 18 Dec 2024 11:49:58 -0800 Subject: [PATCH 23/23] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 639958cf..453504a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Remove support for python 3.8 and added testing for Python 3.13. @mavaylon1 [#240](https://github.com/hdmf-dev/hdmf-zarr/pull/240) * Added `NWBZarrIO.read_nwb` convenience method to simplify reading an NWB file. @oruebel [#226](https://github.com/hdmf-dev/hdmf-zarr/pull/226) * Updated optional dependency groups in `pyproject.toml` and GitHub Actions workflows. @rly, @mavaylon1 [#239](https://github.com/hdmf-dev/hdmf-zarr/pull/239) +* Applied black code formatter. @rly [#247](https://github.com/hdmf-dev/hdmf-zarr/pull/247) ### Bug Fixes * Fix reading of cached specs and caching of specs during export. @rly [#232](https://github.com/hdmf-dev/hdmf-zarr/pull/232)