diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index acbc3bd05..18dc00903 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -78,8 +78,10 @@ jobs: python -m coverage report -m - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: flags: integration files: coverage.xml fail_ci_if_error: true + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/run_dandi_read_tests.yml b/.github/workflows/run_dandi_read_tests.yml index 857b32c9a..7148d209e 100644 --- a/.github/workflows/run_dandi_read_tests.yml +++ b/.github/workflows/run_dandi_read_tests.yml @@ -1,15 +1,15 @@ name: Run DANDI read tests on: - schedule: - - cron: '0 6 * * *' # once per day at 1am ET + # NOTE this is disabled until we can run this systematically instead of randomly + # so we don't get constant error notifications and waste compute cycles + # See https://github.com/NeurodataWithoutBorders/pynwb/issues/1804 + # schedule: + # - cron: '0 6 * * *' # once per day at 1am ET workflow_dispatch: jobs: run-tests: runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} # necessary for conda steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -22,19 +22,14 @@ jobs: submodules: 'recursive' fetch-depth: 0 # tags are required for versioneer to determine the version - - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + - name: Set up Python + uses: actions/setup-python@v4 with: - auto-update-conda: true - activate-environment: ros3 - environment-file: environment-ros3.yml - python-version: "3.11" - channels: conda-forge - auto-activate-base: false + python-version: '3.11' - name: Install run dependencies run: | - python -m pip install dandi pytest + python -m pip install dandi fsspec requests aiohttp pytest python -m pip uninstall -y pynwb # uninstall pynwb python -m pip install -e . python -m pip list @@ -47,4 +42,4 @@ jobs: - name: Run DANDI read tests run: | - python tests/read_dandi/test_read_dandi.py + python tests/read_dandi/read_dandi.py diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 5befd21e7..a06d0280a 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,7 +8,7 @@ version: 2 build: os: ubuntu-20.04 tools: - python: '3.8' + python: '3.11' # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a3a79232..8129a82e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,25 @@ ## PyNWB 2.6.0 (Upcoming) ### Enhancements and minor changes +- For `NWBHDF5IO()`, change the default of arg `load_namespaces` from `False` to `True`. @bendichter [#1748](https://github.com/NeurodataWithoutBorders/pynwb/pull/1748) - Add `NWBHDF5IO.can_read()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) +- Fix usage of the `validate` function in the `pynwb.testing.testh5io` classes and cache the spec by default in those classes. @rly [#1782](https://github.com/NeurodataWithoutBorders/pynwb/pull/1782) +- Updated timeseries data checks to warn instead of error when reading invalid files. @stephprince [#1793](https://github.com/NeurodataWithoutBorders/pynwb/pull/1793) and [#1809](https://github.com/NeurodataWithoutBorders/pynwb/pull/1809) +- Expose the offset, conversion and channel conversion parameters in `mock_ElectricalSeries`. @h-mayorquin [#1796](https://github.com/NeurodataWithoutBorders/pynwb/pull/1796) +- Expose `starting_time` in `mock_ElectricalSeries`. @h-mayorquin [#1805](https://github.com/NeurodataWithoutBorders/pynwb/pull/1805) +- Enhance `get_data_in_units()` to work with objects that have a `channel_conversion` attribute like the `ElectricalSeries`. @h-mayorquin [#1806](https://github.com/NeurodataWithoutBorders/pynwb/pull/1806) +- Refactor validation CLI tests to use `{sys.executable} -m coverage` to use the same Python version and run correctly on Debian systems. @yarikoptic [#1811](https://github.com/NeurodataWithoutBorders/pynwb/pull/1811) +- Fixed tests to address newly caught validation errors. @rly [#1839](https://github.com/NeurodataWithoutBorders/pynwb/pull/1839) + +### Bug fixes +- Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795) +- Fix bug where pynwb version was reported as "unknown" to readthedocs @stephprince [#1810](https://github.com/NeurodataWithoutBorders/pynwb/pull/1810) + +### Documentation and tutorial enhancements +- Add RemFile to streaming tutorial. @bendichter [#1761](https://github.com/NeurodataWithoutBorders/pynwb/pull/1761) +- Fix typos and improve clarify throughout tutorials. @zm711 [#1825](https://github.com/NeurodataWithoutBorders/pynwb/pull/1825) +- Add Zarr IO tutorial @bendichter [#1834](https://github.com/NeurodataWithoutBorders/pynwb/pull/1834) ## PyNWB 2.5.0 (August 18, 2023) diff --git a/docs/gallery/advanced_io/linking_data.py b/docs/gallery/advanced_io/linking_data.py index 082aa3c51..2f79d1488 100644 --- a/docs/gallery/advanced_io/linking_data.py +++ b/docs/gallery/advanced_io/linking_data.py @@ -6,57 +6,50 @@ PyNWB supports linking between files using external links. -""" +Example Use Case: Integrating data from multiple files +--------------------------------------------------------- -#################### -# Example Use Case: Integrating data from multiple files -# --------------------------------------------------------- -# -# NBWContainer classes (e.g., :py:class:`~pynwb.base.TimeSeries`) support the integration of data stored in external -# HDF5 files with NWB data files via external links. To make things more concrete, let's look at the following use -# case. We want to simultaneously record multiple data streams during data acquisition. Using the concept of external -# links allows us to save each data stream to an external HDF5 files during data acquisition and to -# afterwards link the data into a single NWB:N file. In this case, each recording becomes represented by a -# separate file-system object that can be set as read-only once the experiment is done. In the following -# we are using :py:meth:`~pynwb.base.TimeSeries` as an example, but the same approach works for other -# NWBContainers as well. -# +NBWContainer classes (e.g., :py:class:`~pynwb.base.TimeSeries`) support the integration of data stored in external +HDF5 files with NWB data files via external links. To make things more concrete, let's look at the following use +case. We want to simultaneously record multiple data streams during data acquisition. Using the concept of external +links allows us to save each data stream to an external HDF5 files during data acquisition and to +afterwards link the data into a single NWB file. In this case, each recording becomes represented by a +separate file-system object that can be set as read-only once the experiment is done. In the following +we are using :py:meth:`~pynwb.base.TimeSeries` as an example, but the same approach works for other +NWBContainers as well. -#################### -# .. tip:: -# -# The same strategies we use here for creating External Links also apply to Soft Links. -# The main difference between soft and external links is that soft links point to other -# objects within the same file while external links point to objects in external files. -# +.. tip:: -#################### -# .. tip:: -# -# In the case of :py:meth:`~pynwb.base.TimeSeries`, the uncorrected timestamps generated by the acquisition -# system can be stored (or linked) in the *sync* group. In the NWB:N format, hardware-recorded time data -# must then be corrected to a common time base (e.g., timestamps from all hardware sources aligned) before -# it can be included in the *timestamps* of the *TimeSeries*. This means, in the case -# of :py:meth:`~pynwb.base.TimeSeries` we need to be careful that we are not including data with incompatible -# timestamps in the same file when using external links. -# + The same strategies we use here for creating External Links also apply to Soft Links. + The main difference between soft and external links is that soft links point to other + objects within the same file while external links point to objects in external files. -#################### -# .. warning:: -# -# External links can become stale/break. Since external links are pointing to data in other files -# external links may become invalid any time files are modified on the file system, e.g., renamed, -# moved or access permissions are changed. -# + .. tip:: -#################### -# Creating test data -# --------------------------- -# -# In the following we are creating two :py:meth:`~pynwb.base.TimeSeries` each written to a separate file. -# We then show how we can integrate these files into a single NWBFile. + In the case of :py:meth:`~pynwb.base.TimeSeries`, the uncorrected timestamps generated by the acquisition + system can be stored (or linked) in the *sync* group. In the NWB format, hardware-recorded time data + must then be corrected to a common time base (e.g., timestamps from all hardware sources aligned) before + it can be included in the *timestamps* of the *TimeSeries*. This means, in the case + of :py:meth:`~pynwb.base.TimeSeries` we need to be careful that we are not including data with incompatible + timestamps in the same file when using external links. + + +.. warning:: + + External links can become stale/break. Since external links are pointing to data in other files + external links may become invalid any time files are modified on the file system, e.g., renamed, + moved or access permissions are changed. + + +Creating test data +--------------------------- + +In the following we are creating two :py:meth:`~pynwb.base.TimeSeries` each written to a separate file. +We then show how we can integrate these files into a single NWBFile. +""" # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnails_linking_data.png' + from datetime import datetime from uuid import uuid4 @@ -228,7 +221,7 @@ # Step 2: Add the container to another NWBFile # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # To integrate both :py:meth:`~pynwb.base.TimeSeries` into a single file we simply create a new -# :py:meth:`~pynwb.file.NWBFile` and our existing :py:meth:`~pynwb.base.TimeSeries` to it. PyNWB's +# :py:meth:`~pynwb.file.NWBFile` and add our existing :py:meth:`~pynwb.base.TimeSeries` to it. PyNWB's # :py:class:`~pynwb.NWBHDF5IO` backend then automatically detects that the TimeSeries have already # been written to another file and will create external links for us. # diff --git a/docs/gallery/advanced_io/plot_editing.py b/docs/gallery/advanced_io/plot_editing.py new file mode 100644 index 000000000..e45e3b887 --- /dev/null +++ b/docs/gallery/advanced_io/plot_editing.py @@ -0,0 +1,161 @@ +""" +.. _editing: + +Editing NWB files +================= + +This tutorial demonstrates how to edit NWB files in-place to make small changes to +existing containers. To add or remove containers from an NWB file, see +:ref:`modifying_data`. How and whether it is possible to edit an NWB file depends on the +storage backend and the type of edit. + +.. warning:: + + Manually editing an existing NWB file can make the file invalid if you are not + careful. We highly recommend making a copy before editing and running a validation + check on the file after editing it. See :ref:`validating`. + + +Editing datasets +---------------- +When reading an HDF5 NWB file, PyNWB exposes :py:class:`h5py.Dataset` objects, which can +be edited in place. For this to work, you must open the file in read/write mode +(``"r+"`` or ``"a"``). + +First, let's create an NWB file with data: +""" +from pynwb import NWBHDF5IO, NWBFile, TimeSeries +from datetime import datetime +from dateutil.tz import tzlocal +import numpy as np + +nwbfile = NWBFile( + session_description="my first synthetic recording", + identifier="EXAMPLE_ID", + session_start_time=datetime.now(tzlocal()), + session_id="LONELYMTN", +) + +nwbfile.add_acquisition( + TimeSeries( + name="synthetic_timeseries", + description="Random values", + data=np.random.randn(100, 100), + unit="m", + rate=10e3, + ) +) + +with NWBHDF5IO("test_edit.nwb", "w") as io: + io.write(nwbfile) + +############################################## +# Now, let's edit the values of the dataset + +with NWBHDF5IO("test_edit.nwb", "r+") as io: + nwbfile = io.read() + nwbfile.acquisition["synthetic_timeseries"].data[:10] = 0.0 + + +############################################## +# You can edit the attributes of that dataset through the ``attrs`` attribute: + +with NWBHDF5IO("test_edit.nwb", "r+") as io: + nwbfile = io.read() + nwbfile.acquisition["synthetic_timeseries"].data.attrs["unit"] = "volts" + +############################################## +# Changing the shape of dataset +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Whether it is possible to change the shape of a dataset depends on how the dataset was +# created. If the dataset was created with a flexible shape, then it is possible to +# change in-place. Creating a dataset with a flexible shape is done by specifying the +# ``maxshape`` argument of the :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` class +# constructor. Using a ``None`` value for a component of the ``maxshape`` tuple allows +# the size of the corresponding dimension to grow, such that is can be be reset arbitrarily long +# in that dimension. Chunking is required for datasets with flexible shapes. Setting ``maxshape``, +# hence, automatically sets chunking to ``True``, if not specified. +# +# First, let's create an NWB file with a dataset with a flexible shape: + +from hdmf.backends.hdf5.h5_utils import H5DataIO + +nwbfile = NWBFile( + session_description="my first synthetic recording", + identifier="EXAMPLE_ID", + session_start_time=datetime.now(tzlocal()), + session_id="LONELYMTN", +) + +data_io = H5DataIO(data=np.random.randn(100, 100), maxshape=(None, 100)) + +nwbfile.add_acquisition( + TimeSeries( + name="synthetic_timeseries", + description="Random values", + data=data_io, + unit="m", + rate=10e3, + ) +) + +with NWBHDF5IO("test_edit2.nwb", "w") as io: + io.write(nwbfile) + +############################################## +# The ``None``value in the first component of ``maxshape`` means that the +# the first dimension of the dataset is unlimited. By setting the second dimension +# of ``maxshape`` to ``100``, that dimension is fixed to be no larger than ``100``. +# If you do not specify a``maxshape``, then the shape of the dataset will be fixed +# to the shape that the dataset was created with. Here, you can change the shape of +# the first dimension of this dataset. + + +with NWBHDF5IO("test_edit2.nwb", "r+") as io: + nwbfile = io.read() + nwbfile.acquisition["synthetic_timeseries"].data.resize((200, 100)) + +############################################## +# This will change the shape of the dataset in-place. If you try to change the shape of +# a dataset with a fixed shape, you will get an error. +# +# .. note:: +# There are several types of dataset edits that cannot be done in-place: changing the +# shape of a dataset with a fixed shape, or changing the datatype, compression, +# chunking, max-shape, or fill-value of a dataset. For any of these, we recommend using +# the :py:class:`pynwb.NWBHDF5IO.export` method to export the data to a new file. See +# :ref:`modifying_data` for more information. +# +# Editing groups +# -------------- +# Editing of groups is not yet supported in PyNWB. +# To edit the attributes of a group, open the file and edit it using :py:mod:`h5py`: + +import h5py + +with h5py.File("test_edit.nwb", "r+") as f: + f["acquisition"]["synthetic_timeseries"].attrs["description"] = "Random values in volts" + +############################################## +# .. warning:: +# Be careful not to edit values that will bring the file out of compliance with the +# NWB specification. +# +# Renaming groups and datasets +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Rename groups and datasets in-place using the :py:meth:`~h5py.Group.move` method. For example, to rename +# the ``"synthetic_timeseries"`` group: + +with h5py.File("test_edit.nwb", "r+") as f: + f["acquisition"].move("synthetic_timeseries", "synthetic_timeseries_renamed") + +############################################## +# You can use this same technique to move a group or dataset to a different location in +# the file. For example, to move the ``"synthetic_timeseries_renamed"`` group to the +# ``"analysis"`` group: + +with h5py.File("test_edit.nwb", "r+") as f: + f["acquisition"].move( + "synthetic_timeseries_renamed", + "/analysis/synthetic_timeseries_renamed", + ) diff --git a/docs/gallery/advanced_io/plot_iterative_write.py b/docs/gallery/advanced_io/plot_iterative_write.py index c461cddf8..958981a0b 100644 --- a/docs/gallery/advanced_io/plot_iterative_write.py +++ b/docs/gallery/advanced_io/plot_iterative_write.py @@ -17,7 +17,7 @@ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # In the typical write process, datasets are created and written as a whole. In contrast, -# iterative data write refers to the writing of the content of a dataset in an incremental, +# iterative data write refers to the writing of the contents of a dataset in an incremental, # iterative fashion. #################### @@ -32,10 +32,10 @@ # to avoid this problem by writing the data one-subblock-at-a-time, so that we only need to hold # a small subset of the array in memory at any given time. # * **Data streaming** In the context of streaming data we are faced with several issues: -# **1)** data is not available in memory but arrives in subblocks as the stream progresses +# **1)** data is not available in-memory but arrives in subblocks as the stream progresses # **2)** caching the data of a stream in-memory is often prohibitively expensive and volatile # **3)** the total size of the data is often unknown ahead of time. -# Iterative data write allows us to address issues 1) and 2) by enabling us to save data to +# Iterative data write allows us to address issues 1) and 2) by enabling us to save data to a # file incrementally as it arrives from the data stream. Issue 3) is addressed in the HDF5 # storage backend via support for chunking, enabling the creation of resizable arrays. # @@ -44,7 +44,7 @@ # data source. # # * **Sparse data arrays** In order to reduce storage size of sparse arrays a challenge is that while -# the data array (e.g., a matrix) may be large, only few values are set. To avoid storage overhead +# the data array (e.g., a matrix) may be large, only a few values are set. To avoid storage overhead # for storing the full array we can employ (in HDF5) a combination of chunking, compression, and # and iterative data write to significantly reduce storage cost for sparse data. # @@ -161,7 +161,7 @@ def write_test_file(filename, data, close_io=True): # # Here we use a simple data generator but PyNWB does not make any assumptions about what happens # inside the generator. Instead of creating data programmatically, you may hence, e.g., receive -# data from an acquisition system (or other source). We can, hence, use the same approach to write streaming data. +# data from an acquisition system (or other source). We can use the same approach to write streaming data. #################### # Step 1: Define the data generator @@ -208,7 +208,7 @@ def iter_sin(chunk_length=10, max_chunks=100): #################### # Discussion # ^^^^^^^^^^ -# Note, we here actually do not know how long our timeseries will be. +# Note, here we don't actually know how long our timeseries will be. print( "maxshape=%s, recommended_data_shape=%s, dtype=%s" @@ -218,7 +218,7 @@ def iter_sin(chunk_length=10, max_chunks=100): #################### # As we can see :py:class:`~hdmf.data_utils.DataChunkIterator` automatically recommends # in its ``maxshape`` that the first dimensions of our array should be unlimited (``None``) and the second -# dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~hdmf.data_utils.DataChunkIterator` +# dimension should be ``10`` (i.e., the length of our chunk. Since :py:class:`~hdmf.data_utils.DataChunkIterator` # has no way of knowing the minimum size of the array it automatically recommends the size of the first # chunk as the minimum size (i.e, ``(1, 10)``) and also infers the data type automatically from the first chunk. # To further customize this behavior we may also define the ``maxshape``, ``dtype``, and ``buffer_size`` when @@ -227,8 +227,8 @@ def iter_sin(chunk_length=10, max_chunks=100): # .. tip:: # # We here used :py:class:`~hdmf.data_utils.DataChunkIterator` to conveniently wrap our data stream. -# :py:class:`~hdmf.data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order** -# **single** complete element along the **first dimension** of our a array (i.e., iterate over the first +# :py:class:`~hdmf.data_utils.DataChunkIterator` assumes that our generator yields in **consecutive order** +# a **single** complete element along the **first dimension** of our array (i.e., iterate over the first # axis and yield one-element-at-a-time). This behavior is useful in many practical cases. However, if # this strategy does not match our needs, then using :py:class:`~hdmf.data_utils.GenericDataChunkIterator` # or implementing your own derived :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` may be more @@ -266,7 +266,7 @@ def __next__(self): """ Return in each iteration a fully occupied data chunk of self.chunk_shape values at a random location within the matrix. Chunks are non-overlapping. REMEMBER: h5py does not support all - fancy indexing that numpy does so we need to make sure our selection can be + the fancy indexing that numpy does so we need to make sure our selection can be handled by the backend. """ if self.__chunks_created < self.num_chunks: @@ -289,7 +289,7 @@ def __next__(self): next = __next__ def recommended_chunk_shape(self): - # Here we can optionally recommend what a good chunking should be. + # Here we can optionally recommend what a good chunking could be. return self.chunk_shape def recommended_data_shape(self): @@ -379,7 +379,7 @@ def maxshape(self): # Now lets check out the size of our data file and compare it against the expected full size of our matrix import os -expected_size = xsize * ysize * 8 # This is the full size of our matrix in byte +expected_size = xsize * ysize * 8 # This is the full size of our matrix in bytes occupied_size = num_values * 8 # Number of non-zero values in out matrix file_size = os.stat( "basic_sparse_iterwrite_example.nwb" @@ -420,14 +420,14 @@ def maxshape(self): # A slight overhead (here 0.08MB) is expected because our file contains also the additional objects from # the NWBFile, plus some overhead for managing all the HDF5 metadata for all objects. # * **3) vs 2):** Adding compression does not yield any improvement here. This is expected, because, again we -# selected the chunking here in a way that we already allocated the minimum amount of storage to represent our data +# selected the chunking here in a way that we already allocated the minimum amount of storage to represent our data # and lossless compression of random data is not efficient. # * **4) vs 2):** When we increase our chunk size to ``(100,100)`` (i.e., ``100x`` larger than the chunks produced by -# our matrix generator) we observe an according roughly ``100x`` increase in file size. This is expected +# our matrix generator) we observe an accordingly roughly ``100x`` increase in file size. This is expected # since our chunks now do not align perfectly with the occupied data and each occupied chunk is allocated fully. # * **5) vs 4):** When using compression for the larger chunks we see a significant reduction # in file size (``1.14MB`` vs. ``80MB``). This is because the allocated chunks now contain in addition to the random -# values large areas of constant fillvalues, which compress easily. +# values large areas of constant fill values, which compress easily. # # **Advantages:** # @@ -435,12 +435,12 @@ def maxshape(self): # * Only the data chunks in the HDF5 file that contain non-default values are ever being allocated # * The overall size of our file is reduced significantly # * Reduced I/O load -# * On read users can use the array as usual +# * On read, users can use the array as usual # # .. tip:: # -# With great power comes great responsibility **!** I/O and storage cost will depend among others on the chunk size, -# compression options, and the write pattern, i.e., the number and structure of the +# With great power comes great responsibility **!** I/O and storage cost will depend, among other factors, +# on the chunk size, compression options, and the write pattern, i.e., the number and structure of the # :py:class:`~hdmf.data_utils.DataChunk` objects written. For example, using ``(1,1)`` chunks and writing them # one value at a time would result in poor I/O performance in most practical cases, because of the large number of # chunks and large number of small I/O operations required. @@ -471,7 +471,7 @@ def maxshape(self): # # When converting large data files, a typical problem is that it is often too expensive to load all the data # into memory. This example is very similar to the data generator example only that instead of generating -# data on-the-fly in memory we are loading data from a file one-chunk-at-a-time in our generator. +# data on-the-fly in-memory we are loading data from a file one-chunk-at-a-time in our generator. # #################### @@ -568,7 +568,7 @@ def iter_largearray(filename, shape, dtype="float64"): # In practice, data from recording devices may be distributed across many files, e.g., one file per time range # or one file per recording channel. Using iterative data write provides an elegant solution to this problem # as it allows us to process large arrays one-subarray-at-a-time. To make things more interesting we'll show -# this for the case where each recording channel (i.e, the second dimension of our ``TimeSeries``) is broken up +# this for the case where each recording channel (i.e., the second dimension of our ``TimeSeries``) is broken up # across files. #################### diff --git a/docs/gallery/advanced_io/plot_zarr_io.py b/docs/gallery/advanced_io/plot_zarr_io.py new file mode 100644 index 000000000..b61fe4a03 --- /dev/null +++ b/docs/gallery/advanced_io/plot_zarr_io.py @@ -0,0 +1,98 @@ +""" +Zarr IO +======= + +Zarr is an alternative backend option for NWB files. It is a Python package that +provides an implementation of chunked, compressed, N-dimensional arrays. Zarr is a good +option for large datasets because, like HDF5, it is designed to store data on disk and +only load the data into memory when needed. Zarr is also a good option for parallel +computing because it supports concurrent reads and writes. + +Note that the Zarr native storage formats are optimized for storage in cloud storage +(e.g., S3). For very large files, Zarr will create many files which can lead to +issues for traditional file system (that are not cloud object stores) due to limitations +on the number of files per directory (this affects local disk, GDrive, Dropbox etc.). + +Zarr read and write is provided by the :hdmf-zarr:`hdmf-zarr<>` package. First, create an +an NWBFile using PyNWB. +""" + +# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_nwbzarrio.png' + + +from datetime import datetime +from dateutil.tz import tzlocal + +import numpy as np +from pynwb import NWBFile, TimeSeries + +# Create the NWBFile. Substitute your NWBFile generation here. +nwbfile = NWBFile( + session_description="my first synthetic recording", + identifier="EXAMPLE_ID", + session_start_time=datetime.now(tzlocal()), + session_id="LONELYMTN", +) + +####################################################################################### +# Dataset Configuration +# --------------------- +# Like HDF5, Zarr provides options to chunk and compress datasets. To leverage these +# features, replace all :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` with the analogous +# :py:class:`~hdmf_zarr.utils.ZarrDataIO`, which takes compressors specified by the +# :py:mod:`numcodecs` library. For example, here is an example :py:class:`.TimeSeries` +# where the ``data`` Dataset is compressed with a Blosc-zstd compressor: + +from numcodecs import Blosc +from hdmf_zarr import ZarrDataIO + +data_with_zarr_data_io = ZarrDataIO( + data=np.random.randn(100, 100), + chunks=(10, 10), + fillvalue=0, + compressor=Blosc(cname='zstd', clevel=3, shuffle=Blosc.SHUFFLE) +) + +####################################################################################### +# Now add it to the :py:class:`.NWBFile`. + +nwbfile.add_acquisition( + TimeSeries( + name="synthetic_timeseries", + data=data_with_zarr_data_io, + unit="m", + rate=10e3, + ) +) + +####################################################################################### +# Writing to Zarr +# --------------- +# To write NWB files to Zarr, replace the :py:class:`~pynwb.NWBHDF5IO` with +# :py:class:`hdmf_zarr.nwb.NWBZarrIO`. + +from hdmf_zarr.nwb import NWBZarrIO +import os + +path = "zarr_tutorial.nwb.zarr" +absolute_path = os.path.abspath(path) +with NWBZarrIO(path=path, mode="w") as io: + io.write(nwbfile) + +####################################################################################### +# .. note:: +# The main reason for using the ``absolute_path`` here is for testing purposes to +# ensure links and references work as expected. Otherwise, using the relative path +# here instead is fine. +# +# Reading from Zarr +# ----------------- +# To read NWB files from Zarr, replace the :py:class:`~pynwb.NWBHDF5IO` with the analogous +# :py:class:`hdmf_zarr.nwb.NWBZarrIO`. + +with NWBZarrIO(path=absolute_path, mode="r") as io: + read_nwbfile = io.read() + +####################################################################################### +# .. note:: +# For more information, see the :hdmf-zarr:`hdmf-zarr documentation<>`. diff --git a/docs/gallery/advanced_io/streaming.py b/docs/gallery/advanced_io/streaming.py index a1d86575a..4bdc992b8 100644 --- a/docs/gallery/advanced_io/streaming.py +++ b/docs/gallery/advanced_io/streaming.py @@ -74,7 +74,7 @@ # next, open the file with fs.open(s3_url, "rb") as f: with h5py.File(f) as file: - with pynwb.NWBHDF5IO(file=file, load_namespaces=True) as io: + with pynwb.NWBHDF5IO(file=file) as io: nwbfile = io.read() print(nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:]) @@ -95,6 +95,9 @@ # `fsspec documentation on known implementations `_ # for a full updated list of supported store formats. # +# One downside of this fsspec method is that fsspec is not optimized for reading HDF5 files, and so streaming data +# using this method can be slow. A faster alternative is ``remfile`` described below. +# # Streaming Method 2: ROS3 # ------------------------ # ROS3 stands for "read only S3" and is a driver created by the HDF5 Group that allows HDF5 to read HDF5 files stored @@ -104,7 +107,7 @@ from pynwb import NWBHDF5IO -with NWBHDF5IO(s3_url, mode='r', load_namespaces=True, driver='ros3') as io: +with NWBHDF5IO(s3_url, mode='r', driver='ros3') as io: nwbfile = io.read() print(nwbfile) print(nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:]) @@ -125,19 +128,52 @@ # # pip uninstall h5py # conda install -c conda-forge "h5py>=3.2" +# +# Besides the extra burden of installing h5py from a non-PyPI source, one downside of this ROS3 method is that +# this method does not support automatic retries in case the connection fails. +################################################## +# Method 3: remfile +# ----------------- +# ``remfile`` is another library that enables indexing and streaming of files in s3. remfile is simple and fast, +# especially for the initial load of the nwb file and for accessing small pieces of data. The caveats of ``remfile`` +# are that it is a very new project that has not been tested in a variety of use-cases and caching options are +# limited compared to ``fsspec``. `remfile` is a simple, lightweight dependency with a very small codebase. +# +# You can install ``remfile`` with pip: +# +# .. code-block:: bash +# +# pip install remfile +# + +import h5py +from pynwb import NWBHDF5IO +import remfile + +rem_file = remfile.File(s3_url) + +with h5py.File(rem_file, "r") as h5py_file: + with NWBHDF5IO(file=h5py_file, load_namespaces=True) as io: + nwbfile = io.read() + print(nwbfile.acquisition["lick_times"].time_series["lick_left_times"].data[:]) + ################################################## # Which streaming method to choose? # --------------------------------- # # From a user perspective, once opened, the :py:class:`~pynwb.file.NWBFile` works the same with -# both fsspec and ros3. However, in general, we currently recommend using fsspec for streaming -# NWB files because it is more performant and reliable than ros3. In particular fsspec: +# fsspec, ros3, or remfile. However, in general, we currently recommend using fsspec for streaming +# NWB files because it is more performant and reliable than ros3 and more widely tested than remfile. +# However, if you are experiencing long wait times for the initial file load on your network, you +# may want to try remfile. +# +# Advantages of fsspec include: # # 1. supports caching, which will dramatically speed up repeated requests for the # same region of data, # 2. automatically retries when s3 fails to return, which helps avoid errors when accessing data due to -# intermittent errors in connections with S3, +# intermittent errors in connections with S3 (remfile does this as well), # 3. works also with other storage backends (e.g., GoogleDrive or Dropbox, not just S3) and file formats, and # 4. in our experience appears to provide faster out-of-the-box performance than the ros3 driver. diff --git a/docs/gallery/general/add_remove_containers.py b/docs/gallery/general/add_remove_containers.py index 26708f639..90ed8f324 100644 --- a/docs/gallery/general/add_remove_containers.py +++ b/docs/gallery/general/add_remove_containers.py @@ -70,31 +70,13 @@ # file path, and it is not possible to remove objects from an NWB file. You can use the # :py:meth:`NWBHDF5IO.export ` method, detailed below, to modify an NWB file in these ways. # -# .. warning:: -# -# NWB datasets that have been written to disk are read as :py:class:`h5py.Dataset ` objects. -# Directly modifying the data in these :py:class:`h5py.Dataset ` objects immediately -# modifies the data on disk -# (the :py:meth:`NWBHDF5IO.write ` method does not need to be called and the -# :py:class:`~pynwb.NWBHDF5IO` instance does not need to be closed). Directly modifying datasets in this way -# can lead to files that do not validate or cannot be opened, so take caution when using this method. -# Note: only chunked datasets or datasets with ``maxshape`` set can be resized. -# See the `h5py chunked storage documentation `_ -# for more details. - -############################################################################### -# .. note:: -# -# It is not possible to modify the attributes (fields) of an NWB container in memory. - -############################################################################### # Exporting a written NWB file to a new file path -# --------------------------------------------------- +# ----------------------------------------------- # Use the :py:meth:`NWBHDF5IO.export ` method to read data from an existing NWB file, # modify the data, and write the modified data to a new file path. Modifications to the data can be additions or # removals of objects, such as :py:class:`~pynwb.base.TimeSeries` objects. This is especially useful if you -# have raw data and processed data in the same NWB file and you want to create a new NWB file with all of the -# contents of the original file except for the raw data for sharing with collaborators. +# have raw data and processed data in the same NWB file and you want to create a new NWB file with all the contents of +# the original file except for the raw data for sharing with collaborators. # # To remove existing containers, use the :py:class:`~hdmf.utils.LabelledDict.pop` method on any # :py:class:`~hdmf.utils.LabelledDict` object, such as ``NWBFile.acquisition``, ``NWBFile.processing``, @@ -200,7 +182,7 @@ export_io.export(src_io=read_io, nwbfile=read_nwbfile) ############################################################################### -# More information about export -# --------------------------------- # For more information about the export functionality, see :ref:`export` # and the PyNWB documentation for :py:meth:`NWBHDF5IO.export `. +# +# For more information about editing a file in place, see :ref:`editing`. diff --git a/docs/gallery/general/extensions.py b/docs/gallery/general/extensions.py index fa4f4cbb7..4ec8f4749 100644 --- a/docs/gallery/general/extensions.py +++ b/docs/gallery/general/extensions.py @@ -100,7 +100,7 @@ # Using extensions # ----------------------------------------------------- # -# After an extension has been created, it can be used by downstream codes for reading and writing data. +# After an extension has been created, it can be used by downstream code for reading and writing data. # There are two main mechanisms for reading and writing extension data with PyNWB. # The first involves defining new :py:class:`~pynwb.core.NWBContainer` classes that are then mapped # to the neurodata types in the extension. @@ -167,7 +167,7 @@ def __init__(self, **kwargs): # By default, extensions are cached to file so that your NWB file will carry the extensions needed to read the file # with it. # -# To demonstrate this, first we will make some fake data using our extensions. +# To demonstrate this, first we will make some simulated data using our extensions. from datetime import datetime @@ -248,16 +248,11 @@ def __init__(self, **kwargs): # .. note:: # # For more information on writing NWB files, see :ref:`basic_writing`. - -#################### -# By default, PyNWB does not use the namespaces cached in a file--you must -# explicitly specify this. This behavior is enabled by the *load_namespaces* -# argument to the :py:class:`~pynwb.NWBHDF5IO` constructor. - -with NWBHDF5IO("cache_spec_example.nwb", mode="r", load_namespaces=True) as io: - nwbfile = io.read() - -#################### +# +# By default, if a namespace is not already loaded, PyNWB loads the namespace cached in +# the file. To disable this, set ``load_namespaces=False`` in the +# :py:class:`~pynwb.NWBHDF5IO` constructor. +# # .. _MultiContainerInterface: # # Creating and using a custom MultiContainerInterface @@ -375,17 +370,17 @@ class PotatoSack(MultiContainerInterface): nwb = io.read() print(nwb.get_processing_module()["potato_sack"].get_potato("big_potato").weight) # note: you can call get_processing_module() with or without the module name as -# an argument. however, if there is more than one module, the name is required. -# here, there is more than one potato, so the name of the potato is required as -# an argument to get get_potato +# an argument. However, if there is more than one module, the name is required. +# Here, there is more than one potato, so the name of the potato is required as +# an argument to get_potato #################### # Example: Cortical Surface Mesh # ----------------------------------------------------- # # Here we show how to create extensions by creating a data class for a -# cortical surface mesh. This data type is particularly important for ECoG data, we need to know where each electrode is -# with respect to the gyri and sulci. Surface mesh objects contain two types of data: +# cortical surface mesh. This data type is particularly important for ECoG data, since we need to know where +# each electrode is with respect to the gyri and sulci. Surface mesh objects contain two types of data: # # 1. `vertices`, which is an (n, 3) matrix of floats that represents points in 3D space # diff --git a/docs/gallery/general/object_id.py b/docs/gallery/general/object_id.py index 481cbb36a..206142715 100644 --- a/docs/gallery/general/object_id.py +++ b/docs/gallery/general/object_id.py @@ -32,7 +32,7 @@ session_start_time=start_time, ) -# make some fake data +# make some simulated data timestamps = np.linspace(0, 100, 1024) data = ( np.sin(0.333 * timestamps) diff --git a/docs/gallery/general/plot_read_basics.py b/docs/gallery/general/plot_read_basics.py index bba380092..c4a829d75 100644 --- a/docs/gallery/general/plot_read_basics.py +++ b/docs/gallery/general/plot_read_basics.py @@ -104,14 +104,14 @@ filepath = "sub-P11HMH_ses-20061101_ecephys+image.nwb" # Open the file in read mode "r", -io = NWBHDF5IO(filepath, mode="r", load_namespaces=True) +io = NWBHDF5IO(filepath, mode="r") nwbfile = io.read() nwbfile ####################################### # :py:class:`~pynwb.NWBHDF5IO` can also be used as a context manager: -with NWBHDF5IO(filepath, mode="r", load_namespaces=True) as io2: +with NWBHDF5IO(filepath, mode="r") as io2: nwbfile2 = io2.read() # data accessible here diff --git a/docs/source/conf.py b/docs/source/conf.py index 143d9d2c6..5725bd816 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -148,6 +148,8 @@ def __call__(self, filename): 'fsspec': ("https://filesystem-spec.readthedocs.io/en/latest/", None), 'nwbwidgets': ("https://nwb-widgets.readthedocs.io/en/latest/", None), 'nwb-overview': ("https://nwb-overview.readthedocs.io/en/latest/", None), + 'hdmf-zarr': ("https://hdmf-zarr.readthedocs.io/en/latest/", None), + 'numcodecs': ("https://numcodecs.readthedocs.io/en/latest/", None), } extlinks = { @@ -159,6 +161,7 @@ def __call__(self, filename): 'hdmf-docs': ('https://hdmf.readthedocs.io/en/stable/%s', '%s'), 'dandi': ('https://www.dandiarchive.org/%s', '%s'), "nwbinspector": ("https://nwbinspector.readthedocs.io/en/dev/%s", "%s"), + 'hdmf-zarr': ('https://hdmf-zarr.readthedocs.io/en/latest/%s', '%s'), } # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/export.rst b/docs/source/export.rst index 44a7a3a4b..490cd346e 100644 --- a/docs/source/export.rst +++ b/docs/source/export.rst @@ -109,7 +109,7 @@ How do I write a newly instantiated ``NWBFile`` to two different file paths? ----------------------------------------------------------------------------------------------------------------------- PyNWB does not support writing an :py:class:`~pynwb.file.NWBFile` that was not read from a file to two different files. For example, if you instantiate :py:class:`~pynwb.file.NWBFile` A and write it to file path 1, you cannot also write it -to file path 2. However, you can first write the :py:class:`~pynwb.file.NWBFile`` to file path 1, read the +to file path 2. However, you can first write the :py:class:`~pynwb.file.NWBFile` to file path 1, read the :py:class:`~pynwb.file.NWBFile` from file path 1, and then export it to file path 2. .. code-block:: python diff --git a/docs/source/figures/gallery_thumbnail_plot_nwbzarrio.png b/docs/source/figures/gallery_thumbnail_plot_nwbzarrio.png new file mode 100644 index 000000000..8926a47ff Binary files /dev/null and b/docs/source/figures/gallery_thumbnail_plot_nwbzarrio.png differ diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 6e33c2035..368ab7bd0 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -34,7 +34,7 @@ This will automatically install the following required dependencies: Install release from Conda-forge -------------------------------- -`Conda-forge `_ is a community led collection of recipes, build infrastructure +`Conda-forge `_ is a community led collection of recipes, build infrastructure and distributions for the `conda `_ package manager. To install or update PyNWB distribution from conda-forge using conda simply run: diff --git a/docs/source/overview_citing.rst b/docs/source/overview_citing.rst index bc72e017c..8fda20363 100644 --- a/docs/source/overview_citing.rst +++ b/docs/source/overview_citing.rst @@ -35,7 +35,7 @@ If you use PyNWB in your research, please use the following citation: Using RRID ---------- -* ResourceID: `SCR_017452 `_ +* ResourceID: `SCR_017452 `_ * Proper Citation: **(PyNWB, RRID:SCR_017452)** diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index f2ccb335d..07fd97246 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -17,7 +17,7 @@ tested on all supported operating systems and python distributions. That way, as a contributor, you know if you introduced regressions or coding style inconsistencies. -There are badges in the :pynwb:`README <#readme>` file which shows +There are badges in the :pynwb:`README ` file which shows the current condition of the dev branch. -------- @@ -25,7 +25,7 @@ Coverage -------- Code coverage is computed and reported using the coverage_ tool. There are two coverage-related -badges in the :pynwb:`README <#readme>` file. One shows the status of the :pynwb:`GitHub Action workflow ` which runs the coverage_ tool and uploads the report to +badges in the :pynwb:`README ` file. One shows the status of the :pynwb:`GitHub Action workflow ` which runs the coverage_ tool and uploads the report to codecov_, and the other badge shows the percentage coverage reported from codecov_. A detailed report can be found on codecov_, which shows line by line which lines are covered by the tests. diff --git a/docs/source/validation.rst b/docs/source/validation.rst index 8cc32a3f7..73c138127 100644 --- a/docs/source/validation.rst +++ b/docs/source/validation.rst @@ -11,7 +11,7 @@ The validator can be invoked like so: python -m pynwb.validate test.nwb If the file contains no NWB extensions, then this command will validate the file ``test.nwb`` against the -*core* NWB specification. On success, the output will is: +*core* NWB specification. On success, the output will be: .. code-block:: text diff --git a/environment-ros3.yml b/environment-ros3.yml index ae15e985c..c84b4c090 100644 --- a/environment-ros3.yml +++ b/environment-ros3.yml @@ -12,7 +12,10 @@ dependencies: - pandas==2.0.0 - python-dateutil==2.8.2 - setuptools - - dandi==0.55.1 # NOTE: dandi does not support osx-arm64 + - dandi==0.59.0 # NOTE: dandi does not support osx-arm64 - fsspec==2023.6.0 - requests==2.28.1 - aiohttp==3.8.3 + - pip + - pip: + - remfile==0.1.9 diff --git a/requirements-doc.txt b/requirements-doc.txt index 2050f4439..c37aee646 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -12,3 +12,4 @@ dataframe_image # used to render large dataframe as image in the sphinx galler lxml # used by dataframe_image when using the matplotlib backend hdf5plugin dandi>=0.46.6 +hdmf-zarr diff --git a/requirements-min.txt b/requirements-min.txt index 8f52348f1..816d53d43 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,6 +1,6 @@ # minimum versions of package dependencies for installing PyNWB h5py==2.10 # support for selection of datasets with list of indices added in 2.10 -hdmf==3.9.0 +hdmf==3.12.0 numpy==1.18 pandas==1.1.5 python-dateutil==2.7.3 diff --git a/requirements.txt b/requirements.txt index 2ad7b813e..d09ec7425 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # pinned dependencies to reproduce an entire development environment to use PyNWB -h5py==3.8.0 -hdmf==3.9.0 -numpy==1.24.2 -pandas==2.0.0 +h5py==3.10.0 +hdmf==3.12.0 +numpy==1.26.1 +pandas==2.1.2 python-dateutil==2.8.2 -setuptools==65.5.1 +setuptools==65.5.1 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index cccacf048..d44fcc2b1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,6 +3,7 @@ VCS = git versionfile_source = src/pynwb/_version.py versionfile_build = pynwb/_version.py tag_prefix = '' +style = pep440-pre [flake8] max-line-length = 120 @@ -28,7 +29,7 @@ per-file-ignores = tests/integration/__init__.py:F401 src/pynwb/testing/__init__.py:F401 src/pynwb/validate.py:T201 - tests/read_dandi/test_read_dandi.py:T201 + tests/read_dandi/read_first_nwb_asset.py:T201 setup.py:T201 test.py:T201 scripts/*:T201 diff --git a/setup.py b/setup.py index 90aebf55f..0e48c269a 100755 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ reqs = [ 'h5py>=2.10', - 'hdmf>=3.9.0', + 'hdmf>=3.12.0', 'numpy>=1.16', 'pandas>=1.1.5', 'python-dateutil>=2.7.3', diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 710f55ee8..6e3b3104f 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -4,7 +4,6 @@ import os.path from pathlib import Path from copy import deepcopy -from warnings import warn import h5py from hdmf.spec import NamespaceCatalog @@ -244,8 +243,9 @@ def can_read(path: str): 'doc': 'the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x")', 'default': 'r'}, {'name': 'load_namespaces', 'type': bool, - 'doc': 'whether or not to load cached namespaces from given path - not applicable in write mode', - 'default': False}, + 'doc': ('whether or not to load cached namespaces from given path - not applicable in write mode ' + 'or when `manager` is not None or when `extensions` is not None'), + 'default': True}, {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', @@ -261,15 +261,11 @@ def __init__(self, **kwargs): popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'file', 'comm', 'driver', 'herd_path', kwargs) # Define the BuildManager to use - if load_namespaces: - if manager is not None: - warn("loading namespaces from file - ignoring 'manager'") - if extensions is not None: - warn("loading namespaces from file - ignoring 'extensions' argument") - # namespaces are not loaded when creating an NWBHDF5IO object in write mode - if 'w' in mode or mode == 'x': - raise ValueError("cannot load namespaces from file when writing to it") + io_modes_that_create_file = ['w', 'w-', 'x'] + if mode in io_modes_that_create_file or manager is not None or extensions is not None: + load_namespaces = False + if load_namespaces: tm = get_type_map() super().load_namespaces(tm, path, file=file_obj, driver=driver) manager = BuildManager(tm) diff --git a/src/pynwb/_version.py b/src/pynwb/_version.py index 57dfeb9fc..bf16355e1 100644 --- a/src/pynwb/_version.py +++ b/src/pynwb/_version.py @@ -44,7 +44,7 @@ def get_config(): cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440-pre" - cfg.tag_prefix = "*.*.*" + cfg.tag_prefix = "" cfg.parentdir_prefix = "None" cfg.versionfile_source = "src/pynwb/_version.py" cfg.verbose = False diff --git a/src/pynwb/base.py b/src/pynwb/base.py index bec8903d5..42f7b7ff3 100644 --- a/src/pynwb/base.py +++ b/src/pynwb/base.py @@ -174,15 +174,25 @@ def __init__(self, **kwargs): timestamps = args_to_process['timestamps'] if timestamps is not None: if self.rate is not None: - raise ValueError('Specifying rate and timestamps is not supported.') + self._error_on_new_warn_on_construct( + error_msg='Specifying rate and timestamps is not supported.' + ) if self.starting_time is not None: - raise ValueError('Specifying starting_time and timestamps is not supported.') + self._error_on_new_warn_on_construct( + error_msg='Specifying starting_time and timestamps is not supported.' + ) self.fields['timestamps'] = timestamps self.timestamps_unit = self.__time_unit self.interval = 1 if isinstance(timestamps, TimeSeries): timestamps.__add_link('timestamp_link', self) elif self.rate is not None: + if self.rate < 0: + self._error_on_new_warn_on_construct( + error_msg='Rate must not be a negative value.' + ) + elif self.rate == 0.0 and get_data_shape(data)[0] > 1: + warn('Timeseries has a rate of 0.0 Hz, but the length of the data is greater than 1.') if self.starting_time is None: # override default if rate is provided but not starting time self.starting_time = 0.0 self.starting_time_unit = self.__time_unit @@ -288,7 +298,11 @@ def get_timestamps(self): return np.arange(len(self.data)) / self.rate + self.starting_time def get_data_in_units(self): - return np.asarray(self.data) * self.conversion + self.offset + if "channel_conversion" in self.fields: + scale_factor = self.conversion * self.channel_conversion[:, np.newaxis] + else: + scale_factor = self.conversion + return np.asarray(self.data) * scale_factor + self.offset @register_class('Image', CORE_NAMESPACE) diff --git a/src/pynwb/testing/mock/ecephys.py b/src/pynwb/testing/mock/ecephys.py index 888f19962..54edf7680 100644 --- a/src/pynwb/testing/mock/ecephys.py +++ b/src/pynwb/testing/mock/ecephys.py @@ -71,18 +71,26 @@ def mock_ElectricalSeries( data=None, rate: float = 30000.0, timestamps=None, + starting_time: Optional[float] = None, electrodes: Optional[DynamicTableRegion] = None, filtering: str = "filtering", - nwbfile: Optional[NWBFile] = None + nwbfile: Optional[NWBFile] = None, + channel_conversion: Optional[np.ndarray] = None, + conversion: float = 1.0, + offset: float = 0., ) -> ElectricalSeries: electrical_series = ElectricalSeries( name=name or name_generator("ElectricalSeries"), description=description, data=data if data is not None else np.ones((10, 5)), rate=rate, + starting_time=starting_time, timestamps=timestamps, electrodes=electrodes or mock_electrodes(nwbfile=nwbfile), filtering=filtering, + conversion=conversion, + offset=offset, + channel_conversion=channel_conversion, ) if nwbfile is not None: diff --git a/src/pynwb/testing/mock/ophys.py b/src/pynwb/testing/mock/ophys.py index d9ba02572..cd99d5957 100644 --- a/src/pynwb/testing/mock/ophys.py +++ b/src/pynwb/testing/mock/ophys.py @@ -132,6 +132,7 @@ def mock_OnePhotonSeries( conversion=conversion, timestamps=timestamps, starting_time=starting_time, + offset=offset, rate=rate, comments=comments, description=description, @@ -162,6 +163,7 @@ def mock_TwoPhotonSeries( dimension=None, resolution=-1.0, conversion=1.0, + offset=0.0, timestamps=None, starting_time=None, comments="no comments", @@ -194,6 +196,7 @@ def mock_TwoPhotonSeries( control=control, control_description=control_description, device=device, + offset=offset, ) if nwbfile is not None: diff --git a/src/pynwb/testing/testh5io.py b/src/pynwb/testing/testh5io.py index b45407bfb..c7b3bfdcc 100644 --- a/src/pynwb/testing/testh5io.py +++ b/src/pynwb/testing/testh5io.py @@ -79,7 +79,7 @@ def test_roundtrip_export(self): self.assertIs(self.read_exported_nwbfile.objects[self.container.object_id], self.read_container) self.assertContainerEqual(self.read_container, self.container, ignore_hdmf_attrs=True) - def roundtripContainer(self, cache_spec=False): + def roundtripContainer(self, cache_spec=True): """Add the Container to an NWBFile, write it to file, read the file, and return the Container from the file. """ session_description = 'a file to test writing and reading a %s' % self.container_type @@ -116,7 +116,7 @@ def roundtripContainer(self, cache_spec=False): self.reader = None raise e - def roundtripExportContainer(self, cache_spec=False): + def roundtripExportContainer(self, cache_spec=True): """ Add the test Container to an NWBFile, write it to file, read the file, export the read NWBFile to another file, and return the test Container from the file @@ -163,18 +163,14 @@ def getContainer(self, nwbfile): def validate(self): """ Validate the created files """ if os.path.exists(self.filename): - with NWBHDF5IO(self.filename, mode='r') as io: - errors = pynwb_validate(io) - if errors: - for err in errors: - raise Exception(err) + errors, _ = pynwb_validate(paths=[self.filename]) + if errors: + raise Exception("\n".join(errors)) if os.path.exists(self.export_filename): - with NWBHDF5IO(self.filename, mode='r') as io: - errors = pynwb_validate(io) - if errors: - for err in errors: - raise Exception(err) + errors, _ = pynwb_validate(paths=[self.export_filename]) + if errors: + raise Exception("\n".join(errors)) class AcquisitionH5IOMixin(NWBH5IOMixin): @@ -294,7 +290,7 @@ def test_roundtrip_export(self): self.assertIs(self.read_exported_nwbfile.objects[self.container.object_id], self.read_container) self.assertContainerEqual(self.read_container, self.container, ignore_hdmf_attrs=True) - def roundtripContainer(self, cache_spec=False): + def roundtripContainer(self, cache_spec=True): """Write the file, validate the file, read the file, and return the Container from the file. """ @@ -325,7 +321,7 @@ def roundtripContainer(self, cache_spec=False): self.reader = None raise e - def roundtripExportContainer(self, cache_spec=False): + def roundtripExportContainer(self, cache_spec=True): """ Roundtrip the container, then export the read NWBFile to a new file, validate the files, and return the test Container from the file. @@ -366,13 +362,11 @@ def roundtripExportContainer(self, cache_spec=False): def validate(self): """Validate the created files.""" if os.path.exists(self.filename): - with NWBHDF5IO(self.filename, mode='r') as io: - errors = pynwb_validate(io) - if errors: - raise Exception("\n".join(errors)) + errors, _ = pynwb_validate(paths=[self.filename]) + if errors: + raise Exception("\n".join(errors)) if os.path.exists(self.export_filename): - with NWBHDF5IO(self.filename, mode='r') as io: - errors = pynwb_validate(io) - if errors: - raise Exception("\n".join(errors)) + errors, _ = pynwb_validate(paths=[self.export_filename]) + if errors: + raise Exception("\n".join(errors)) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 23b3aee6f..827249cbb 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -120,7 +120,11 @@ def _get_cached_namespaces_to_validate( is_method=False, ) def validate(**kwargs): - """Validate NWB file(s) against a namespace or its cached namespaces.""" + """Validate NWB file(s) against a namespace or its cached namespaces. + + NOTE: If an io object is provided and no namespace name is specified, then the file will be validated + against the core namespace, even if use_cached_namespaces is True. + """ from . import NWBHDF5IO # TODO: modularize to avoid circular import io, paths, use_cached_namespaces, namespace, verbose, driver = getargs( @@ -156,6 +160,7 @@ def validate(**kwargs): file=sys.stderr, ) else: + io_kwargs.update(load_namespaces=False) namespaces_to_validate = [CORE_NAMESPACE] if namespace is not None: diff --git a/tests/back_compat/test_import_structure.py b/tests/back_compat/test_import_structure.py index e5f931f5d..79d4f6ad0 100644 --- a/tests/back_compat/test_import_structure.py +++ b/tests/back_compat/test_import_structure.py @@ -82,7 +82,6 @@ def test_outer_import_structure(self): "spec", "testing", "validate", - "warn", ] for member in expected_structure: self.assertIn(member=member, container=current_structure) diff --git a/tests/back_compat/test_read.py b/tests/back_compat/test_read.py index 919ae6bde..16a119690 100644 --- a/tests/back_compat/test_read.py +++ b/tests/back_compat/test_read.py @@ -29,8 +29,24 @@ class TestReadOldVersions(TestCase): "- expected an array of shape '[None]', got non-array data 'one publication'")], '1.0.3_str_pub.nwb': [("root/general/related_publications (general/related_publications): incorrect shape " "- expected an array of shape '[None]', got non-array data 'one publication'")], + '1.5.1_timeseries_no_data.nwb': [("TimeSeries/data/data (acquisition/test_timeseries/data): argument missing")], + '1.5.1_timeseries_no_unit.nwb': [("TimeSeries/data/unit (acquisition/test_timeseries/data): argument missing")], + '1.5.1_imageseries_no_data.nwb': [("ImageSeries/data/data (acquisition/test_imageseries/data): " + "argument missing")], + '1.5.1_imageseries_no_unit.nwb': [("ImageSeries/data/unit (acquisition/test_imageseries/data): " + "argument missing")], } + def get_io(self, path): + """Get an NWBHDF5IO object for the given path.""" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + return NWBHDF5IO(str(path), 'r') + def test_read(self): """Test reading and validating all NWB files in the same folder as this file. @@ -43,7 +59,7 @@ def test_read(self): with self.subTest(file=f.name): with warnings.catch_warnings(record=True) as warnings_on_read: warnings.simplefilter("always") - with NWBHDF5IO(str(f), 'r', load_namespaces=True) as io: + with self.get_io(f) as io: errors = validate(io) io.read() for w in warnings_on_read: @@ -69,28 +85,28 @@ def test_read(self): def test_read_timeseries_no_data(self): """Test that a TimeSeries written without data is read with data set to the default value.""" f = Path(__file__).parent / '1.5.1_timeseries_no_data.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_timeseries'].data, TimeSeries.DEFAULT_DATA) def test_read_timeseries_no_unit(self): """Test that an ImageSeries written without unit is read with unit set to the default value.""" f = Path(__file__).parent / '1.5.1_timeseries_no_unit.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_timeseries'].unit, TimeSeries.DEFAULT_UNIT) def test_read_imageseries_no_data(self): """Test that an ImageSeries written without data is read with data set to the default value.""" f = Path(__file__).parent / '1.5.1_imageseries_no_data.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_imageseries'].data, ImageSeries.DEFAULT_DATA) def test_read_imageseries_no_unit(self): """Test that an ImageSeries written without unit is read with unit set to the default value.""" f = Path(__file__).parent / '1.5.1_imageseries_no_unit.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_imageseries'].unit, ImageSeries.DEFAULT_UNIT) @@ -100,7 +116,7 @@ def test_read_imageseries_non_external_format(self): f = Path(__file__).parent / fbase expected_warning = self.expected_warnings[fbase][0] with self.assertWarnsWith(UserWarning, expected_warning): - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_imageseries'].format, "tiff") @@ -110,13 +126,13 @@ def test_read_imageseries_nonmatch_starting_frame(self): f = Path(__file__).parent / fbase expected_warning = self.expected_warnings[fbase][0] with self.assertWarnsWith(UserWarning, expected_warning): - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_imageseries'].starting_frame, [1, 2, 3]) def test_read_subject_no_age__reference(self): """Test that reading a Subject without an age__reference set with NWB schema 2.5.0 sets the value to None""" f = Path(__file__).parent / '2.2.0_subject_no_age__reference.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertIsNone(read_nwbfile.subject.age__reference) diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index 0fd790073..d68334c89 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -3,6 +3,7 @@ import numpy as np from h5py import File from pathlib import Path +import tempfile from pynwb import NWBFile, TimeSeries, get_manager, NWBHDF5IO, validate @@ -14,6 +15,7 @@ from pynwb.spec import NWBGroupSpec, NWBDatasetSpec, NWBNamespace from pynwb.ecephys import ElectricalSeries, LFP from pynwb.testing import remove_test_file, TestCase +from pynwb.testing.mock.file import mock_NWBFile class TestHDF5Writer(TestCase): @@ -122,6 +124,19 @@ def test_write_no_cache_spec(self): with File(self.path, 'r') as f: self.assertNotIn('specifications', f) + def test_file_creation_io_modes(self): + io_modes_that_create_file = ["w", "w-", "x"] + + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir = Path(temp_dir) + for io_mode in io_modes_that_create_file: + file_path = temp_dir / f"test_io_mode={io_mode}.nwb" + + # Test file creation + nwbfile = mock_NWBFile() + with NWBHDF5IO(str(file_path), io_mode) as io: + io.write(nwbfile) + class TestHDF5WriterWithInjectedFile(TestCase): diff --git a/tests/integration/hdf5/test_misc.py b/tests/integration/hdf5/test_misc.py index 6afd7971e..cd9ab1706 100644 --- a/tests/integration/hdf5/test_misc.py +++ b/tests/integration/hdf5/test_misc.py @@ -109,20 +109,36 @@ class TestDecompositionSeriesIO(NWBH5IOMixin, TestCase): def setUpContainer(self): """ Return the test DecompositionSeries to read/write """ - self.timeseries = TimeSeries(name='dummy timeseries', description='desc', - data=np.ones((3, 3)), unit='flibs', - timestamps=np.ones((3,))) - bands = DynamicTable(name='bands', description='band info for LFPSpectralAnalysis', columns=[ - VectorData(name='band_name', description='name of bands', data=['alpha', 'beta', 'gamma']), - VectorData(name='band_limits', description='low and high cutoffs in Hz', data=np.ones((3, 2))) - ]) - spec_anal = DecompositionSeries(name='LFPSpectralAnalysis', - description='my description', - data=np.ones((3, 3, 3)), - timestamps=np.ones((3,)), - source_timeseries=self.timeseries, - metric='amplitude', - bands=bands) + self.timeseries = TimeSeries( + name='dummy timeseries', + description='desc', + data=np.ones((3, 3)), + unit='flibs', + timestamps=np.ones((3,)), + ) + bands = DynamicTable( + name='bands', + description='band info for LFPSpectralAnalysis', + columns=[ + VectorData(name='band_name', description='name of bands', data=['alpha', 'beta', 'gamma']), + VectorData(name='band_limits', description='low and high cutoffs in Hz', data=np.ones((3, 2))), + VectorData(name='band_mean', description='mean gaussian filters in Hz', data=np.ones((3,))), + VectorData( + name='band_stdev', + description='standard deviation of gaussian filters in Hz', + data=np.ones((3,)) + ), + ], + ) + spec_anal = DecompositionSeries( + name='LFPSpectralAnalysis', + description='my description', + data=np.ones((3, 3, 3)), + timestamps=np.ones((3,)), + source_timeseries=self.timeseries, + metric='amplitude', + bands=bands, + ) return spec_anal @@ -144,27 +160,48 @@ def make_electrode_table(self): """ Make an electrode table, electrode group, and device """ self.table = get_electrode_table() self.dev1 = Device(name='dev1') - self.group = ElectrodeGroup(name='tetrode1', - description='tetrode description', - location='tetrode location', - device=self.dev1) - for i in range(4): + self.group = ElectrodeGroup( + name='tetrode1', + description='tetrode description', + location='tetrode location', + device=self.dev1 + ) + for _ in range(4): self.table.add_row(location='CA1', group=self.group, group_name='tetrode1') def setUpContainer(self): """ Return the test ElectricalSeries to read/write """ self.make_electrode_table(self) - region = DynamicTableRegion(name='source_channels', - data=[0, 2], - description='the first and third electrodes', - table=self.table) + region = DynamicTableRegion( + name='source_channels', + data=[0, 2], + description='the first and third electrodes', + table=self.table + ) data = np.random.randn(100, 2, 30) timestamps = np.arange(100)/100 - ds = DecompositionSeries(name='test_DS', - data=data, - source_channels=region, - timestamps=timestamps, - metric='amplitude') + bands = DynamicTable( + name='bands', + description='band info for LFPSpectralAnalysis', + columns=[ + VectorData(name='band_name', description='name of bands', data=['alpha', 'beta', 'gamma']), + VectorData(name='band_limits', description='low and high cutoffs in Hz', data=np.ones((3, 2))), + VectorData(name='band_mean', description='mean gaussian filters in Hz', data=np.ones((3,))), + VectorData( + name='band_stdev', + description='standard deviation of gaussian filters in Hz', + data=np.ones((3,)) + ), + ], + ) + ds = DecompositionSeries( + name='test_DS', + data=data, + source_channels=region, + timestamps=timestamps, + metric='amplitude', + bands=bands, + ) return ds def addContainer(self, nwbfile): diff --git a/tests/integration/ros3/test_ros3.py b/tests/integration/ros3/test_ros3.py index c2f7b562d..95a891760 100644 --- a/tests/integration/ros3/test_ros3.py +++ b/tests/integration/ros3/test_ros3.py @@ -4,6 +4,7 @@ from pynwb.testing import TestCase import urllib.request import h5py +import warnings class TestRos3Streaming(TestCase): @@ -28,16 +29,28 @@ def setUp(self): def test_read(self): s3_path = 'https://dandiarchive.s3.amazonaws.com/ros3test.nwb' - with NWBHDF5IO(s3_path, mode='r', driver='ros3') as io: - nwbfile = io.read() - test_data = nwbfile.acquisition['ts_name'].data[:] - self.assertEqual(len(test_data), 3) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + with NWBHDF5IO(s3_path, mode='r', driver='ros3') as io: + nwbfile = io.read() + test_data = nwbfile.acquisition['ts_name'].data[:] + self.assertEqual(len(test_data), 3) def test_dandi_read(self): - with NWBHDF5IO(path=self.s3_test_path, mode='r', driver='ros3') as io: - nwbfile = io.read() - test_data = nwbfile.acquisition['TestData'].data[:] - self.assertEqual(len(test_data), 3) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + with NWBHDF5IO(path=self.s3_test_path, mode='r', driver='ros3') as io: + nwbfile = io.read() + test_data = nwbfile.acquisition['TestData'].data[:] + self.assertEqual(len(test_data), 3) def test_dandi_get_cached_namespaces(self): expected_namespaces = ["core"] diff --git a/tests/read_dandi/test_read_dandi.py b/tests/read_dandi/read_first_nwb_asset.py similarity index 79% rename from tests/read_dandi/test_read_dandi.py rename to tests/read_dandi/read_first_nwb_asset.py index 0e0698d77..895dbb1c2 100644 --- a/tests/read_dandi/test_read_dandi.py +++ b/tests/read_dandi/read_first_nwb_asset.py @@ -1,5 +1,7 @@ -"""Test reading NWB files from the DANDI Archive using ROS3.""" +"""Test reading NWB files from the DANDI Archive using fsspec.""" from dandi.dandiapi import DandiAPIClient +import fsspec +import h5py import random import sys import traceback @@ -10,9 +12,12 @@ # NOTE: do not name the function with "test_" prefix, otherwise pytest # will try to run it as a test +# TODO read dandisets systematically, not randomly +# see https://github.com/NeurodataWithoutBorders/pynwb/issues/1804 + def read_first_nwb_asset(): - """Test reading the first NWB asset from a random selection of 50 dandisets that uses NWB.""" - num_dandisets_to_read = 50 + """Test reading the first NWB asset from a random selection of 2 dandisets that uses NWB.""" + num_dandisets_to_read = 2 client = DandiAPIClient() dandisets = list(client.get_dandisets()) random.shuffle(dandisets) @@ -20,6 +25,8 @@ def read_first_nwb_asset(): print("Reading NWB files from the following dandisets:") print([d.get_raw_metadata()["identifier"] for d in dandisets_to_read]) + fs = fsspec.filesystem("http") + failed_reads = dict() for i, dandiset in enumerate(dandisets_to_read): dandiset_metadata = dandiset.get_raw_metadata() @@ -47,8 +54,10 @@ def read_first_nwb_asset(): s3_url = first_asset.get_content_url(follow_redirects=1, strip_query=True) try: - with NWBHDF5IO(path=s3_url, load_namespaces=True, driver="ros3") as io: - io.read() + with fs.open(s3_url, "rb") as f: + with h5py.File(f) as file: + with NWBHDF5IO(file=file) as io: + io.read() except Exception as e: print(traceback.format_exc()) failed_reads[dandiset] = e diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py index 805f946ec..ad4ce6739 100644 --- a/tests/unit/test_base.py +++ b/tests/unit/test_base.py @@ -405,6 +405,65 @@ def test_get_data_in_units(self): ts = mock_TimeSeries(data=[1., 2., 3.]) assert_array_equal(ts.get_data_in_units(), [1., 2., 3.]) + def test_non_positive_rate(self): + with self.assertRaisesWith(ValueError, 'Rate must not be a negative value.'): + TimeSeries(name='test_ts', data=list(), unit='volts', rate=-1.0) + + with self.assertWarnsWith(UserWarning, + 'Timeseries has a rate of 0.0 Hz, but the length of the data is greater than 1.'): + TimeSeries(name='test_ts1', data=[1, 2, 3], unit='volts', rate=0.0) + + def test_file_with_non_positive_rate_in_construct_mode(self): + """Test that UserWarning is raised when rate is 0 or negative + while being in construct mode (i.e,. on data read).""" + obj = TimeSeries.__new__(TimeSeries, + container_source=None, + parent=None, + object_id="test", + in_construct_mode=True) + with self.assertWarnsWith(warn_type=UserWarning, exc_msg='Rate must not be a negative value.'): + obj.__init__( + name="test_ts", + data=list(), + unit="volts", + rate=-1.0 + ) + + def test_file_with_rate_and_timestamps_in_construct_mode(self): + """Test that UserWarning is raised when rate and timestamps are both specified + while being in construct mode (i.e,. on data read).""" + obj = TimeSeries.__new__(TimeSeries, + container_source=None, + parent=None, + object_id="test", + in_construct_mode=True) + with self.assertWarnsWith(warn_type=UserWarning, exc_msg='Specifying rate and timestamps is not supported.'): + obj.__init__( + name="test_ts", + data=[11, 12, 13, 14, 15], + unit="volts", + rate=1.0, + timestamps=[1, 2, 3, 4, 5] + ) + + def test_file_with_starting_time_and_timestamps_in_construct_mode(self): + """Test that UserWarning is raised when starting_time and timestamps are both specified + while being in construct mode (i.e,. on data read).""" + obj = TimeSeries.__new__(TimeSeries, + container_source=None, + parent=None, + object_id="test", + in_construct_mode=True) + with self.assertWarnsWith(warn_type=UserWarning, + exc_msg='Specifying starting_time and timestamps is not supported.'): + obj.__init__( + name="test_ts", + data=[11, 12, 13, 14, 15], + unit="volts", + starting_time=1.0, + timestamps=[1, 2, 3, 4, 5] + ) + class TestImage(TestCase): def test_init(self): diff --git a/tests/unit/test_ecephys.py b/tests/unit/test_ecephys.py index 6f76a5e8c..f81b61f84 100644 --- a/tests/unit/test_ecephys.py +++ b/tests/unit/test_ecephys.py @@ -18,6 +18,7 @@ from pynwb.device import Device from pynwb.file import ElectrodeTable from pynwb.testing import TestCase +from pynwb.testing.mock.ecephys import mock_ElectricalSeries from hdmf.common import DynamicTableRegion @@ -115,6 +116,24 @@ def test_dimensions_warning(self): "but instead the first does. Data is oriented incorrectly and should be transposed." ) in str(w[-1].message) + def test_get_data_in_units(self): + + data = np.asarray([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]) + conversion = 1.0 + offset = 3.0 + channel_conversion = np.asarray([2.0, 2.0]) + electrical_series = mock_ElectricalSeries( + data=data, + conversion=conversion, + offset=offset, + channel_conversion=channel_conversion, + ) + + data_in_units = electrical_series.get_data_in_units() + expected_data = data * conversion * channel_conversion[:, np.newaxis] + offset + + np.testing.assert_almost_equal(data_in_units, expected_data) + class SpikeEventSeriesConstructor(TestCase): diff --git a/tests/unit/test_file.py b/tests/unit/test_file.py index c9bd98ad0..756009ff3 100644 --- a/tests/unit/test_file.py +++ b/tests/unit/test_file.py @@ -527,6 +527,7 @@ def test_subject_age_duration(self): class TestCacheSpec(TestCase): + """Test whether the file can be written and read when caching the spec.""" def setUp(self): self.path = 'unittest_cached_spec.nwb' @@ -535,18 +536,20 @@ def tearDown(self): remove_test_file(self.path) def test_simple(self): - nwbfile = NWBFile(' ', ' ', + nwbfile = NWBFile('sess_desc', 'identifier', datetime.now(tzlocal()), file_create_date=datetime.now(tzlocal()), institution='University of California, San Francisco', lab='Chang Lab') with NWBHDF5IO(self.path, 'w') as io: io.write(nwbfile) - with NWBHDF5IO(self.path, 'r', load_namespaces=True) as reader: + with NWBHDF5IO(self.path, 'r') as reader: nwbfile = reader.read() + assert nwbfile.session_description == "sess_desc" class TestNoCacheSpec(TestCase): + """Test whether the file can be written and read when not caching the spec.""" def setUp(self): self.path = 'unittest_cached_spec.nwb' @@ -555,7 +558,7 @@ def tearDown(self): remove_test_file(self.path) def test_simple(self): - nwbfile = NWBFile(' ', ' ', + nwbfile = NWBFile('sess_desc', 'identifier', datetime.now(tzlocal()), file_create_date=datetime.now(tzlocal()), institution='University of California, San Francisco', @@ -563,8 +566,9 @@ def test_simple(self): with NWBHDF5IO(self.path, 'w') as io: io.write(nwbfile, cache_spec=False) - with NWBHDF5IO(self.path, 'r', load_namespaces=True) as reader: + with NWBHDF5IO(self.path, 'r') as reader: nwbfile = reader.read() + assert nwbfile.session_description == "sess_desc" class TestTimestampsRefDefault(TestCase): diff --git a/tests/unit/test_misc.py b/tests/unit/test_misc.py index 99e0d6f87..9350d1d2e 100644 --- a/tests/unit/test_misc.py +++ b/tests/unit/test_misc.py @@ -33,7 +33,13 @@ def test_init(self): timestamps=[1., 2., 3.]) bands = DynamicTable(name='bands', description='band info for LFPSpectralAnalysis', columns=[ VectorData(name='band_name', description='name of bands', data=['alpha', 'beta', 'gamma']), - VectorData(name='band_limits', description='low and high cutoffs in Hz', data=np.ones((3, 2))) + VectorData(name='band_limits', description='low and high cutoffs in Hz', data=np.ones((3, 2))), + VectorData(name='band_mean', description='mean gaussian filters in Hz', data=np.ones((3,))), + VectorData( + name='band_stdev', + description='standard deviation of gaussian filters in Hz', + data=np.ones((3,)) + ), ]) spec_anal = DecompositionSeries(name='LFPSpectralAnalysis', description='my description', @@ -49,6 +55,8 @@ def test_init(self): np.testing.assert_equal(spec_anal.timestamps, [1., 2., 3.]) self.assertEqual(spec_anal.bands['band_name'].data, ['alpha', 'beta', 'gamma']) np.testing.assert_equal(spec_anal.bands['band_limits'].data, np.ones((3, 2))) + np.testing.assert_equal(spec_anal.bands['band_mean'].data, np.ones((3,))) + np.testing.assert_equal(spec_anal.bands['band_stdev'].data, np.ones((3,))) self.assertEqual(spec_anal.source_timeseries, timeseries) self.assertEqual(spec_anal.metric, 'amplitude') diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 74ce0992c..c2829ee1f 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -1,5 +1,6 @@ import subprocess import re +import sys from unittest.mock import patch from io import StringIO import warnings @@ -8,26 +9,35 @@ from pynwb import validate, NWBHDF5IO +# NOTE we use "coverage run -m pynwb.validate" instead of "python -m pynwb.validate" +# so that we can both test pynwb.validate and compute code coverage from that test. +# NOTE we also use "coverage run -p" which will generate a .coverage file with the +# machine name, process id, and a random number appended to the filename to +# simplify collecting and merging coverage data from multiple subprocesses. if "-p" +# is not used, then each "coverage run" will overwrite the .coverage file from a +# previous "coverage run". +# NOTE we run "coverage" as "{sys.executable} -m coverage" to 1. make sure to use +# the same python version, and on Debian systems executable is "python3-coverage", not +# just "coverage". +# NOTE the run_coverage.yml GitHub Action runs "python -m coverage combine" to +# combine the individual coverage reports into one .coverage file. +def run_coverage(extra_args: list[str]): + return subprocess.run( + [sys.executable, "-m", "coverage", "run", "-p", "-m", "pynwb.validate"] + + extra_args, + capture_output=True + ) + + class TestValidateCLI(TestCase): # 1.0.2_nwbfile.nwb has no cached specifications # 1.0.3_nwbfile.nwb has cached "core" specification # 1.1.2_nwbfile.nwb has cached "core" and "hdmf-common" specifications - # NOTE we use "coverage run -m pynwb.validate" instead of "python -m pynwb.validate" - # so that we can both test pynwb.validate and compute code coverage from that test. - # NOTE we also use "coverage run -p" which will generate a .coverage file with the - # machine name, process id, and a random number appended to the filename to - # simplify collecting and merging coverage data from multiple subprocesses. if "-p" - # is not used, then each "coverage run" will overwrite the .coverage file from a - # previous "coverage run". - # NOTE the run_coverage.yml GitHub Action runs "python -m coverage combine" to - # combine the individual coverage reports into one .coverage file. - def test_validate_file_no_cache(self): """Test that validating a file with no cached spec against the core namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/1.0.2_nwbfile.nwb"], capture_output=True) + result = run_coverage(["tests/back_compat/1.0.2_nwbfile.nwb"]) stderr_regex = re.compile( r"The file tests/back_compat/1\.0\.2_nwbfile\.nwb has no cached namespace information\. " @@ -42,8 +52,7 @@ def test_validate_file_no_cache(self): def test_validate_file_no_cache_bad_ns(self): """Test that validating a file with no cached spec against a specified, unknown namespace fails.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.0.2_nwbfile.nwb", - "--ns", "notfound"], capture_output=True) + result = run_coverage(["tests/back_compat/1.0.2_nwbfile.nwb", "--ns", "notfound"]) stderr_regex = re.compile( r"The file tests/back_compat/1\.0\.2_nwbfile\.nwb has no cached namespace information\. " @@ -57,8 +66,7 @@ def test_validate_file_no_cache_bad_ns(self): def test_validate_file_cached(self): """Test that validating a file with cached spec against its cached namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/1.1.2_nwbfile.nwb"], capture_output=True) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -69,8 +77,7 @@ def test_validate_file_cached(self): def test_validate_file_cached_bad_ns(self): """Test that validating a file with cached spec against a specified, unknown namespace fails.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/1.1.2_nwbfile.nwb", "--ns", "notfound"], capture_output=True) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb", "--ns", "notfound"]) stderr_regex = re.compile( r"The namespace 'notfound' could not be found in cached namespace information as only " @@ -82,8 +89,7 @@ def test_validate_file_cached_bad_ns(self): def test_validate_file_cached_extension(self): """Test that validating a file with cached spec against the cached namespaces succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/2.1.0_nwbfile_with_extension.nwb"], capture_output=True) + result = run_coverage(["tests/back_compat/2.1.0_nwbfile_with_extension.nwb"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -94,9 +100,7 @@ def test_validate_file_cached_extension(self): def test_validate_file_cached_extension_pass_ns(self): """Test that validating a file with cached spec against the extension namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/2.1.0_nwbfile_with_extension.nwb", - "--ns", "ndx-testextension"], capture_output=True) + result = run_coverage(["tests/back_compat/2.1.0_nwbfile_with_extension.nwb", "--ns", "ndx-testextension"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -107,9 +111,7 @@ def test_validate_file_cached_extension_pass_ns(self): def test_validate_file_cached_core(self): """Test that validating a file with cached spec against the core namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/2.1.0_nwbfile_with_extension.nwb", - "--ns", "core"], capture_output=True) + result = run_coverage(["tests/back_compat/2.1.0_nwbfile_with_extension.nwb", "--ns", "core"]) stdout_regex = re.compile( r"The namespace 'core' is included by the namespace 'ndx-testextension'. " @@ -119,8 +121,7 @@ def test_validate_file_cached_core(self): def test_validate_file_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.1.2_nwbfile.nwb", - "--ns", "hdmf-common"], capture_output=True) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb", "--ns", "hdmf-common"]) stderr_regex = re.compile( r"The namespace 'hdmf-common' is included by the namespace 'core'\. Please validate against that " @@ -130,8 +131,7 @@ def test_validate_file_cached_hdmf_common(self): def test_validate_file_cached_ignore(self): """Test that validating a file with cached spec against the core namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.1.2_nwbfile.nwb", - "--no-cached-namespace"], capture_output=True) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb", "--no-cached-namespace"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -142,13 +142,7 @@ def test_validate_file_cached_ignore(self): def test_validate_file_invalid(self): """Test that validating an invalid file outputs errors.""" - result = subprocess.run( - [ - "coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.0.2_str_experimenter.nwb", - "--no-cached-namespace" - ], - capture_output=True - ) + result = run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace"]) stderr_regex = re.compile( r" - found the following errors:\s*" @@ -164,13 +158,7 @@ def test_validate_file_invalid(self): def test_validate_file_list_namespaces_core(self): """Test listing namespaces from a file""" - result = subprocess.run( - [ - "coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.1.2_nwbfile.nwb", - "--list-namespaces" - ], - capture_output=True - ) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb", "--list-namespaces"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -179,13 +167,7 @@ def test_validate_file_list_namespaces_core(self): def test_validate_file_list_namespaces_extension(self): """Test listing namespaces from a file with an extension""" - result = subprocess.run( - [ - "coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/2.1.0_nwbfile_with_extension.nwb", - "--list-namespaces" - ], - capture_output=True - ) + result = run_coverage(["tests/back_compat/2.1.0_nwbfile_with_extension.nwb", "--list-namespaces"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -199,64 +181,54 @@ class TestValidateFunction(TestCase): # 1.0.3_nwbfile.nwb has cached "core" specification # 1.1.2_nwbfile.nwb has cached "core" and "hdmf-common" specificaitions + def get_io(self, path): + """Get an NWBHDF5IO object for the given path, ignoring the warning about ignoring cached namespaces.""" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + return NWBHDF5IO(str(path), 'r') + def test_validate_io_no_cache(self): """Test that validating a file with no cached spec against the core namespace succeeds.""" - with NWBHDF5IO('tests/back_compat/1.0.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: errors = validate(io) self.assertEqual(errors, []) def test_validate_io_no_cache_bad_ns(self): """Test that validating a file with no cached spec against a specified, unknown namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.0.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""): validate(io, 'notfound') def test_validate_io_cached(self): """Test that validating a file with cached spec against its cached namespace succeeds.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: errors = validate(io) self.assertEqual(errors, []) def test_validate_io_cached_extension(self): """Test that validating a file with cached spec against its cached namespaces succeeds.""" - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: - errors = validate(io) - self.assertEqual(errors, []) + with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + errors = validate(io) + self.assertEqual(errors, []) def test_validate_io_cached_extension_pass_ns(self): """Test that validating a file with cached extension spec against the extension namespace succeeds.""" - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: - errors = validate(io, 'ndx-testextension') - self.assertEqual(errors, []) + with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + errors = validate(io, 'ndx-testextension') + self.assertEqual(errors, []) def test_validate_io_cached_core_with_io(self): """ For back-compatability, test that validating a file with cached extension spec against the core namespace succeeds when using the `io` + `namespace` keywords. """ - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO( - path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb', mode='r', load_namespaces=True - ) as io: - results = validate(io=io, namespace="core") - self.assertEqual(results, []) + with self.get_io(path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + results = validate(io=io, namespace="core") + self.assertEqual(results, []) def test_validate_file_cached_extension(self): """ @@ -310,13 +282,13 @@ def test_validate_file_cached_no_cache_bad_ns(self): def test_validate_io_cached_bad_ns(self): """Test that validating a file with cached spec against a specified, unknown namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""): validate(io, 'notfound') def test_validate_io_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: # TODO this error should not be different from the error when using the validate script above msg = "builder must have data type defined with attribute 'data_type'" with self.assertRaisesWith(ValueError, msg):