Merge branch 'dev' into internal-link-checking

NeurodataWithoutBorders · Feb 5, 2024 · 0a5a4f5 · 0a5a4f5
2 parents 89bcaf8 + dd6baaa
commit 0a5a4f5
Show file tree

Hide file tree

Showing 19 changed files with 389 additions and 72 deletions.
diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml
@@ -78,8 +78,10 @@ jobs:
           python -m coverage report -m
 
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
+        uses: codecov/codecov-action@v4
         with:
           flags: integration
           files: coverage.xml
           fail_ci_if_error: true
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -8,7 +8,7 @@ version: 2
 build:
   os: ubuntu-20.04
   tools:
-    python: '3.8'
+    python: '3.11'
 
 # Build documentation in the docs/ directory with Sphinx
 sphinx:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@
 - Expose `starting_time` in `mock_ElectricalSeries`. @h-mayorquin [#1805](https://github.com/NeurodataWithoutBorders/pynwb/pull/1805)
 - Enhance `get_data_in_units()` to work with objects that have a `channel_conversion` attribute like the `ElectricalSeries`. @h-mayorquin [#1806](https://github.com/NeurodataWithoutBorders/pynwb/pull/1806)
 - Refactor validation CLI tests to use `{sys.executable} -m coverage` to use the same Python version and run correctly on Debian systems. @yarikoptic [#1811](https://github.com/NeurodataWithoutBorders/pynwb/pull/1811)
+- Fixed tests to address newly caught validation errors. @rly [#1839](https://github.com/NeurodataWithoutBorders/pynwb/pull/1839)
 
 ### Bug fixes
 - Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795)
@@ -20,6 +21,7 @@
 - Add RemFile to streaming tutorial. @bendichter [#1761](https://github.com/NeurodataWithoutBorders/pynwb/pull/1761)
 - Fix typos and improve clarify throughout tutorials. @zm711 [#1825](https://github.com/NeurodataWithoutBorders/pynwb/pull/1825)
 - Fix internal links in docstrings and tutorials. @stephprince [#1827](https://github.com/NeurodataWithoutBorders/pynwb/pull/1827)
+- Add Zarr IO tutorial @bendichter [#1834](https://github.com/NeurodataWithoutBorders/pynwb/pull/1834)
 
 ## PyNWB 2.5.0 (August 18, 2023)
 

diff --git a/docs/gallery/advanced_io/plot_editing.py b/docs/gallery/advanced_io/plot_editing.py
@@ -0,0 +1,161 @@
+"""
+.. _editing:
+
+Editing NWB files
+=================
+
+This tutorial demonstrates how to edit NWB files in-place to make small changes to
+existing containers. To add or remove containers from an NWB file, see
+:ref:`modifying_data`. How and whether it is possible to edit an NWB file depends on the
+storage backend and the type of edit.
+
+.. warning::
+
+     Manually editing an existing NWB file can make the file invalid if you are not
+     careful. We highly recommend making a copy before editing and running a validation
+     check on the file after editing it. See :ref:`validating`.
+
+
+Editing datasets
+----------------
+When reading an HDF5 NWB file, PyNWB exposes :py:class:`h5py.Dataset` objects, which can
+be edited in place. For this to work, you must open the file in read/write mode
+(``"r+"`` or ``"a"``).
+
+First, let's create an NWB file with data:
+"""
+from pynwb import NWBHDF5IO, NWBFile, TimeSeries
+from datetime import datetime
+from dateutil.tz import tzlocal
+import numpy as np
+
+nwbfile = NWBFile(
+    session_description="my first synthetic recording",
+    identifier="EXAMPLE_ID",
+    session_start_time=datetime.now(tzlocal()),
+    session_id="LONELYMTN",
+)
+
+nwbfile.add_acquisition(
+    TimeSeries(
+        name="synthetic_timeseries",
+        description="Random values",
+        data=np.random.randn(100, 100),
+        unit="m",
+        rate=10e3,
+    )
+)
+
+with NWBHDF5IO("test_edit.nwb", "w") as io:
+    io.write(nwbfile)
+
+##############################################
+# Now, let's edit the values of the dataset
+
+with NWBHDF5IO("test_edit.nwb", "r+") as io:
+    nwbfile = io.read()
+    nwbfile.acquisition["synthetic_timeseries"].data[:10] = 0.0
+
+
+##############################################
+# You can edit the attributes of that dataset through the ``attrs`` attribute:
+
+with NWBHDF5IO("test_edit.nwb", "r+") as io:
+    nwbfile = io.read()
+    nwbfile.acquisition["synthetic_timeseries"].data.attrs["unit"] = "volts"
+
+##############################################
+# Changing the shape of dataset
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Whether it is possible to change the shape of a dataset depends on how the dataset was
+# created. If the dataset was created with a flexible shape, then it is possible to
+# change in-place. Creating a dataset with a flexible shape is done by specifying the
+# ``maxshape`` argument of the :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` class
+# constructor. Using a ``None`` value for a component of the ``maxshape`` tuple allows
+# the size of the corresponding dimension to grow, such that is can be be reset arbitrarily long
+# in that dimension. Chunking is required for datasets with flexible shapes. Setting ``maxshape``,
+# hence,  automatically sets chunking to ``True``, if not specified.
+#
+# First, let's create an NWB file with a dataset with a flexible shape:
+
+from hdmf.backends.hdf5.h5_utils import H5DataIO
+
+nwbfile = NWBFile(
+    session_description="my first synthetic recording",
+    identifier="EXAMPLE_ID",
+    session_start_time=datetime.now(tzlocal()),
+    session_id="LONELYMTN",
+)
+
+data_io = H5DataIO(data=np.random.randn(100, 100), maxshape=(None, 100))
+
+nwbfile.add_acquisition(
+    TimeSeries(
+        name="synthetic_timeseries",
+        description="Random values",
+        data=data_io,
+        unit="m",
+        rate=10e3,
+    )
+)
+
+with NWBHDF5IO("test_edit2.nwb", "w") as io:
+    io.write(nwbfile)
+
+##############################################
+# The ``None``value  in the first component of ``maxshape`` means that the
+# the first dimension of the dataset is unlimited. By setting the second dimension
+# of ``maxshape`` to ``100``, that dimension is fixed to be no larger than ``100``.
+# If you do not specify a``maxshape``, then the shape of the dataset will be fixed
+# to the shape that the dataset was created with. Here, you can change the shape of
+# the first dimension of this dataset.
+
+
+with NWBHDF5IO("test_edit2.nwb", "r+") as io:
+    nwbfile = io.read()
+    nwbfile.acquisition["synthetic_timeseries"].data.resize((200, 100))
+
+##############################################
+# This will change the shape of the dataset in-place. If you try to change the shape of
+# a dataset with a fixed shape, you will get an error.
+#
+# .. note::
+#   There are several types of dataset edits that cannot be done in-place: changing the
+#   shape of a dataset with a fixed shape, or changing the datatype, compression,
+#   chunking, max-shape, or fill-value of a dataset. For any of these, we recommend using
+#   the :py:class:`pynwb.NWBHDF5IO.export` method to export the data to a new file. See
+#   :ref:`modifying_data` for more information.
+#
+# Editing groups
+# --------------
+# Editing of groups is not yet supported in PyNWB.
+# To edit the attributes of a group, open the file and edit it using :py:mod:`h5py`:
+
+import h5py
+
+with h5py.File("test_edit.nwb", "r+") as f:
+    f["acquisition"]["synthetic_timeseries"].attrs["description"] = "Random values in volts"
+
+##############################################
+# .. warning::
+#    Be careful not to edit values that will bring the file out of compliance with the
+#    NWB specification.
+#
+# Renaming groups and datasets
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Rename groups and datasets in-place using the :py:meth:`~h5py.Group.move` method. For example, to rename
+# the ``"synthetic_timeseries"`` group:
+
+with h5py.File("test_edit.nwb", "r+") as f:
+    f["acquisition"].move("synthetic_timeseries", "synthetic_timeseries_renamed")
+
+##############################################
+# You can use this same technique to move a group or dataset to a different location in
+# the file. For example, to move the ``"synthetic_timeseries_renamed"`` group to the
+# ``"analysis"`` group:
+
+with h5py.File("test_edit.nwb", "r+") as f:
+    f["acquisition"].move(
+        "synthetic_timeseries_renamed",
+        "/analysis/synthetic_timeseries_renamed",
+    )
diff --git a/docs/gallery/advanced_io/plot_zarr_io.py b/docs/gallery/advanced_io/plot_zarr_io.py
@@ -0,0 +1,98 @@
+"""
+Zarr IO
+=======
+
+Zarr is an alternative backend option for NWB files. It is a Python package that
+provides an implementation of chunked, compressed, N-dimensional arrays. Zarr is a good
+option for large datasets because, like HDF5, it is designed to store data on disk and
+only load the data into memory when needed. Zarr is also a good option for parallel
+computing because it supports concurrent reads and writes.
+
+Note that the Zarr native storage formats are optimized for storage in cloud storage
+(e.g., S3). For very large files, Zarr will create many files which can lead to
+issues for traditional file system (that are not cloud object stores) due to limitations
+on the number of files per directory (this affects local disk, GDrive, Dropbox etc.).
+
+Zarr read and write is provided by the :hdmf-zarr:`hdmf-zarr<>` package. First, create an
+an NWBFile using PyNWB.
+"""
+
+# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_nwbzarrio.png'
+
+
+from datetime import datetime
+from dateutil.tz import tzlocal
+
+import numpy as np
+from pynwb import NWBFile, TimeSeries
+
+# Create the NWBFile. Substitute your NWBFile generation here.
+nwbfile = NWBFile(
+    session_description="my first synthetic recording",
+    identifier="EXAMPLE_ID",
+    session_start_time=datetime.now(tzlocal()),
+    session_id="LONELYMTN",
+)
+
+#######################################################################################
+# Dataset Configuration
+# ---------------------
+# Like HDF5, Zarr provides options to chunk and compress datasets. To leverage these
+# features, replace all :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` with the analogous
+# :py:class:`~hdmf_zarr.utils.ZarrDataIO`, which takes compressors specified by the
+# :py:mod:`numcodecs` library. For example, here is an example :py:class:`.TimeSeries`
+# where the ``data`` Dataset is compressed with a Blosc-zstd compressor:
+
+from numcodecs import Blosc
+from hdmf_zarr import ZarrDataIO
+
+data_with_zarr_data_io = ZarrDataIO(
+    data=np.random.randn(100, 100),
+    chunks=(10, 10),
+    fillvalue=0,
+    compressor=Blosc(cname='zstd', clevel=3, shuffle=Blosc.SHUFFLE)
+)
+
+#######################################################################################
+# Now add it to the :py:class:`.NWBFile`.
+
+nwbfile.add_acquisition(
+    TimeSeries(
+        name="synthetic_timeseries",
+        data=data_with_zarr_data_io,
+        unit="m",
+        rate=10e3,
+    )
+)
+
+#######################################################################################
+# Writing to Zarr
+# ---------------
+# To write NWB files to Zarr, replace the :py:class:`~pynwb.NWBHDF5IO` with
+# :py:class:`hdmf_zarr.nwb.NWBZarrIO`.
+
+from hdmf_zarr.nwb import NWBZarrIO
+import os
+
+path = "zarr_tutorial.nwb.zarr"
+absolute_path = os.path.abspath(path)
+with NWBZarrIO(path=path, mode="w") as io:
+    io.write(nwbfile)
+
+#######################################################################################
+# .. note::
+#   The main reason for using the ``absolute_path`` here is for testing purposes to
+#   ensure links and references work as expected. Otherwise, using the relative path
+#   here instead is fine.
+#
+# Reading from Zarr
+# -----------------
+# To read NWB files from Zarr, replace the :py:class:`~pynwb.NWBHDF5IO` with the analogous
+# :py:class:`hdmf_zarr.nwb.NWBZarrIO`.
+
+with NWBZarrIO(path=absolute_path, mode="r") as io:
+    read_nwbfile = io.read()
+
+#######################################################################################
+# .. note::
+#    For more information, see the :hdmf-zarr:`hdmf-zarr documentation<>`.
diff --git a/docs/gallery/advanced_io/streaming.py b/docs/gallery/advanced_io/streaming.py
@@ -169,6 +169,6 @@
 # 1. supports caching, which will dramatically speed up repeated requests for the
 #    same region of data,
 # 2. automatically retries when s3 fails to return, which helps avoid errors when accessing data due to
-#     intermittent errors in connections with S3 (remfile does this as well),
+#    intermittent errors in connections with S3 (remfile does this as well),
 # 3. works also with other storage backends (e.g., GoogleDrive or Dropbox, not just S3) and file formats, and
 # 4. in our experience appears to provide faster out-of-the-box performance than the ros3 driver.
diff --git a/docs/gallery/general/add_remove_containers.py b/docs/gallery/general/add_remove_containers.py
@@ -70,31 +70,13 @@
 # file path, and it is not possible to remove objects from an NWB file. You can use the
 # :py:meth:`NWBHDF5IO.export <pynwb.NWBHDF5IO.export>` method, detailed below, to modify an NWB file in these ways.
 #
-# .. warning::
-#
-#   NWB datasets that have been written to disk are read as :py:class:`h5py.Dataset <h5py.Dataset>` objects.
-#   Directly modifying the data in these :py:class:`h5py.Dataset <h5py.Dataset>` objects immediately
-#   modifies the data on disk
-#   (the :py:meth:`NWBHDF5IO.write <hdmf.backends.hdf5.h5tools.HDF5IO.write>` method does not need to be
-#   called and the :py:class:`~pynwb.NWBHDF5IO` instance does not need to be closed). Directly modifying datasets in
-#   this way can lead to files that do not validate or cannot be opened, so exercise caution when using this method.
-#   Note: only chunked datasets or datasets with ``maxshape`` set can be resized.
-#   See the `h5py chunked storage documentation <https://docs.h5py.org/en/stable/high/dataset.html#chunked-storage>`_
-#   for more details.
-
-###############################################################################
-# .. note::
-#
-#   It is not possible to modify the attributes (fields) of an NWB container in memory.
-
-###############################################################################
 # Exporting a written NWB file to a new file path
-# ---------------------------------------------------
+# -----------------------------------------------
 # Use the :py:meth:`NWBHDF5IO.export <pynwb.NWBHDF5IO.export>` method to read data from an existing NWB file,
 # modify the data, and write the modified data to a new file path. Modifications to the data can be additions or
 # removals of objects, such as :py:class:`~pynwb.base.TimeSeries` objects. This is especially useful if you
-# have raw data and processed data in the same NWB file and you want to create a new NWB file with all of the
-# contents of the original file except for the raw data for sharing with collaborators.
+# have raw data and processed data in the same NWB file and you want to create a new NWB file with all the contents of
+# the original file except for the raw data for sharing with collaborators.
 #
 # To remove existing containers, use the :py:meth:`~hdmf.utils.LabelledDict.pop` method on any
 # :py:class:`~hdmf.utils.LabelledDict` object, such as ``NWBFile.acquisition``, ``NWBFile.processing``,
@@ -200,7 +182,7 @@
         export_io.export(src_io=read_io, nwbfile=read_nwbfile)
 
 ###############################################################################
-# More information about export
-# ---------------------------------
 # For more information about the export functionality, see :ref:`export`
 # and the PyNWB documentation for :py:meth:`NWBHDF5IO.export <pynwb.NWBHDF5IO.export>`.
+#
+# For more information about editing a file in place, see :ref:`editing`.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -149,6 +149,8 @@ def __call__(self, filename):
     'nwbwidgets': ("https://nwb-widgets.readthedocs.io/en/latest/", None),
     'nwb-overview': ("https://nwb-overview.readthedocs.io/en/latest/", None),
     'zarr': ("https://zarr.readthedocs.io/en/stable/", None),
+    'hdmf-zarr': ("https://hdmf-zarr.readthedocs.io/en/latest/", None),
+    'numcodecs': ("https://numcodecs.readthedocs.io/en/latest/", None),
 }
 
 extlinks = {
@@ -160,6 +162,7 @@ def __call__(self, filename):
     'hdmf-docs': ('https://hdmf.readthedocs.io/en/stable/%s', '%s'),
     'dandi': ('https://www.dandiarchive.org/%s', '%s'),
     "nwbinspector": ("https://nwbinspector.readthedocs.io/en/dev/%s", "%s"),
+    'hdmf-zarr': ('https://hdmf-zarr.readthedocs.io/en/latest/%s', '%s'),
 }
 
 nitpicky = True

diff --git a/docs/source/figures/gallery_thumbnail_plot_nwbzarrio.png b/docs/source/figures/gallery_thumbnail_plot_nwbzarrio.png
diff --git a/docs/source/overview_citing.rst b/docs/source/overview_citing.rst
@@ -35,7 +35,7 @@ If you use PyNWB in your research, please use the following citation:
 Using RRID
 ----------
 
-* ResourceID: `SCR_017452 <https://scicrunch.org/browse/resources/SCR_017452>`_
+* ResourceID: `SCR_017452 <https://scicrunch.org/resolver/SCR_017452>`_
 * Proper Citation: **(PyNWB, RRID:SCR_017452)**
 
 

diff --git a/requirements-doc.txt b/requirements-doc.txt
@@ -12,3 +12,4 @@ dataframe_image   # used to render large dataframe as image in the sphinx galler
 lxml  # used by dataframe_image when using the matplotlib backend
 hdf5plugin
 dandi>=0.46.6
+hdmf-zarr
diff --git a/requirements-min.txt b/requirements-min.txt
@@ -1,6 +1,6 @@
 # minimum versions of package dependencies for installing PyNWB
 h5py==2.10  # support for selection of datasets with list of indices added in 2.10
-hdmf==3.9.0
+hdmf==3.12.0
 numpy==1.18
 pandas==1.1.5
 python-dateutil==2.7.3