diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3542c3f8c..562162300 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -12,6 +12,6 @@ Show how to reproduce the new behavior (can be a bug fix or a new feature) - [ ] Did you update CHANGELOG.md with your changes? - [ ] Have you checked our [Contributing](https://github.com/NeurodataWithoutBorders/pynwb/blob/dev/docs/CONTRIBUTING.rst) document? - [ ] Have you ensured the PR clearly describes the problem and the solution? -- [ ] Is your contribution compliant with our coding style? This can be checked running `flake8` from the source directory. +- [ ] Is your contribution compliant with our coding style? This can be checked running `ruff check . && codespell` from the source directory. - [ ] Have you checked to ensure that there aren't other open [Pull Requests](https://github.com/NeurodataWithoutBorders/pynwb/pulls) for the same change? - [ ] Have you included the relevant issue number using "Fix #XXX" notation where XXX is the issue number? By including "Fix #XXX" you allow GitHub to close issue #XXX when the PR is merged. diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 4e8ea2418..a1b1f75dd 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -38,8 +38,9 @@ jobs: - { name: windows-python3.12 , test-tox-env: py312 , build-tox-env: build-py312 , python-ver: "3.12", os: windows-latest } - { name: windows-python3.12-upgraded , test-tox-env: py312-upgraded , build-tox-env: build-py312-upgraded , python-ver: "3.12", os: windows-latest } - { name: windows-python3.12-prerelease, test-tox-env: py312-prerelease, build-tox-env: build-py312-prerelease, python-ver: "3.11", os: windows-latest } + # minimum versions of dependencies do not have wheels or cannot be built on macos-arm64 - { name: macos-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: macos-13 } - - { name: macos-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: macos-13 } + - { name: macos-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: macos-latest } - { name: macos-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: macos-latest } - { name: macos-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: macos-latest } - { name: macos-python3.11-opt , test-tox-env: py311-optional , build-tox-env: build-py311 , python-ver: "3.11", os: macos-latest } @@ -98,6 +99,7 @@ jobs: - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: windows-latest } - { name: windows-gallery-python3.12-prerelease, test-tox-env: gallery-py312-prerelease, python-ver: "3.12", os: windows-latest } + # minimum versions of dependencies do not have wheels or cannot be built on macos-arm64 - { name: macos-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: macos-13 } - { name: macos-gallery-python3.12-upgraded , test-tox-env: gallery-py312-upgraded , python-ver: "3.12", os: macos-latest } - { name: macos-gallery-python3.12-prerelease , test-tox-env: gallery-py312-prerelease, python-ver: "3.12", os: macos-latest } @@ -201,7 +203,7 @@ jobs: include: - { name: conda-linux-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest } - { name: conda-windows-python3.12-ros3, python-ver: "3.12", os: windows-latest } - - { name: conda-macos-python3.12-ros3 , python-ver: "3.12", os: macos-13 } # This is due to DANDI not supporting osx-arm64. Will support macos-latest when this changes. + - { name: conda-macos-python3.12-ros3 , python-ver: "3.12", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -248,7 +250,7 @@ jobs: include: - { name: conda-linux-gallery-python3.12-ros3 , python-ver: "3.12", os: ubuntu-latest } - { name: conda-windows-gallery-python3.12-ros3, python-ver: "3.12", os: windows-latest } - - { name: conda-macos-gallery-python3.12-ros3 , python-ver: "3.12", os: macos-13 } # This is due to DANDI not supporting osx-arm64. Will support macos-latest when this changes. + - { name: conda-macos-gallery-python3.12-ros3 , python-ver: "3.12", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -273,7 +275,6 @@ jobs: - name: Install run dependencies run: | - pip install matplotlib pip install . pip list diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index b512b2de0..e365d78cf 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -24,6 +24,7 @@ jobs: - { name: linux-python3.12-upgraded , test-tox-env: py312-upgraded , build-tox-env: build-py312-upgraded , python-ver: "3.12", os: ubuntu-latest , upload-wheels: true } - { name: windows-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-python3.12-upgraded , test-tox-env: py312-upgraded , build-tox-env: build-py312-upgraded , python-ver: "3.12", os: windows-latest } + # minimum versions of dependencies do not have wheels or cannot be built on macos-arm64 - { name: macos-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: macos-13 } steps: - name: Cancel non-latest runs @@ -63,7 +64,7 @@ jobs: - name: Upload distribution as a workspace artifact if: ${{ matrix.upload-wheels }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: distributions path: dist @@ -282,7 +283,7 @@ jobs: python-version: '3.12' - name: Download wheel and source distributions from artifact - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: distributions path: dist diff --git a/.gitignore b/.gitignore index c0a2aca3e..95f08686e 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,6 @@ tests/coverage/htmlcov # Version _version.py + +.core_typemap_version +core_typemap.pkl diff --git a/CHANGELOG.md b/CHANGELOG.md index cee64c308..e5909f577 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,35 @@ # PyNWB Changelog -## PyNWB 2.8.1 (Upcoming) +## PyNWB 2.8.3 (Upcoming) + +### Performance +- Cache global type map to speed import 3X. @sneakers-the-rat [#1931](https://github.com/NeurodataWithoutBorders/pynwb/pull/1931) + +## PyNWB 2.8.2 (September 9, 2024) + +### Enhancements and minor changes +- Added support for numpy 2.0. @mavaylon1 [#1956](https://github.com/NeurodataWithoutBorders/pynwb/pull/1956) +- Make `get_cached_namespaces_to_validate` a public function @stephprince [#1961](https://github.com/NeurodataWithoutBorders/pynwb/pull/1961) + +### Documentation and tutorial enhancements +- Added pre-release pull request instructions to release process documentation @stephprince [#1928](https://github.com/NeurodataWithoutBorders/pynwb/pull/1928) +- Added section on how to use the `family` driver in `h5py` for splitting data across multiple files @oruebel [#1949](https://github.com/NeurodataWithoutBorders/pynwb/pull/1949) + +### Bug fixes +- Fixed `can_read` method to return False if no nwbfile version can be found @stephprince [#1934](https://github.com/NeurodataWithoutBorders/pynwb/pull/1934) +- Changed `epoch_tags` to be a NWBFile property instead of constructor argument. @stephprince [#1935](https://github.com/NeurodataWithoutBorders/pynwb/pull/1935) +- Exposed option to not cache the spec in `NWBHDF5IO.export`. @rly [#1959](https://github.com/NeurodataWithoutBorders/pynwb/pull/1959) + +## PyNWB 2.8.1 (July 3, 2024) ### Documentation and tutorial enhancements - Simplified the introduction to NWB tutorial. @rly [#1914](https://github.com/NeurodataWithoutBorders/pynwb/pull/1914) - Simplified the ecephys and ophys tutorials. [#1915](https://github.com/NeurodataWithoutBorders/pynwb/pull/1915) +- Add comments to `src/pynwb/io/file.py` to improve developer documentation. @rly [#1925](https://github.com/NeurodataWithoutBorders/pynwb/pull/1925) ### Bug fixes - Fixed use of `channel_conversion` in `TimeSeries` `get_data_in_units`. @rohanshah [1923](https://github.com/NeurodataWithoutBorders/pynwb/pull/1923) - ## PyNWB 2.8.0 (May 28, 2024) ### Enhancements and minor changes diff --git a/README.rst b/README.rst index d5d99789a..408446cff 100644 --- a/README.rst +++ b/README.rst @@ -49,10 +49,10 @@ Overall Health :target: https://github.com/neurodatawithoutborders/pynwb/blob/dev/license.txt :alt: PyPI - License -**Conda** +**Conda Feedstock** -.. image:: https://circleci.com/gh/conda-forge/pynwb-feedstock.svg?style=shield - :target: https://circleci.com/gh/conda-forge/pynwb-feedstock +.. image:: https://dev.azure.com/conda-forge/feedstock-builds/_apis/build/status/pynwb-feedstock?branchName=main + :target: https://dev.azure.com/conda-forge/feedstock-builds/_build/latest?definitionId=5703&branchName=main :alt: Conda Feedstock Status NWB Format API diff --git a/docs/gallery/advanced_io/plot_iterative_write.py b/docs/gallery/advanced_io/plot_iterative_write.py index 958981a0b..bb629e14d 100644 --- a/docs/gallery/advanced_io/plot_iterative_write.py +++ b/docs/gallery/advanced_io/plot_iterative_write.py @@ -1,4 +1,6 @@ """ +.. _iterative_write: + Iterative Data Write ==================== diff --git a/docs/gallery/advanced_io/linking_data.py b/docs/gallery/advanced_io/plot_linking_data.py similarity index 60% rename from docs/gallery/advanced_io/linking_data.py rename to docs/gallery/advanced_io/plot_linking_data.py index 2f79d1488..00dfe5056 100644 --- a/docs/gallery/advanced_io/linking_data.py +++ b/docs/gallery/advanced_io/plot_linking_data.py @@ -13,7 +13,7 @@ HDF5 files with NWB data files via external links. To make things more concrete, let's look at the following use case. We want to simultaneously record multiple data streams during data acquisition. Using the concept of external links allows us to save each data stream to an external HDF5 files during data acquisition and to -afterwards link the data into a single NWB file. In this case, each recording becomes represented by a +afterward link the data into a single NWB file. In this case, each recording becomes represented by a separate file-system object that can be set as read-only once the experiment is done. In the following we are using :py:meth:`~pynwb.base.TimeSeries` as an example, but the same approach works for other NWBContainers as well. @@ -42,7 +42,7 @@ Creating test data ---------------------------- +^^^^^^^^^^^^^^^^^^ In the following we are creating two :py:meth:`~pynwb.base.TimeSeries` each written to a separate file. We then show how we can integrate these files into a single NWBFile. @@ -61,7 +61,7 @@ # Create the base data start_time = datetime(2017, 4, 3, 11, tzinfo=tzlocal()) data = np.arange(1000).reshape((100, 10)) -timestamps = np.arange(100) +timestamps = np.arange(100, dtype=float) filename1 = "external1_example.nwb" filename2 = "external2_example.nwb" filename3 = "external_linkcontainer_example.nwb" @@ -105,12 +105,12 @@ ##################### # Linking to select datasets -# -------------------------- +# ^^^^^^^^^^^^^^^^^^^^^^^^^^ # #################### # Step 1: Create the new NWBFile -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Create the first file nwbfile4 = NWBFile( @@ -122,7 +122,7 @@ #################### # Step 2: Get the dataset you want to link to -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Now let's open our test files and retrieve our timeseries. # @@ -134,7 +134,7 @@ #################### # Step 3: Create the object you want to link to the data -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # To link to the dataset we can simply assign the data object (here `` timeseries_1.data``) to a new ``TimeSeries`` @@ -167,7 +167,7 @@ #################### # Step 4: Write the data -# ^^^^^^^^^^^^^^^^^^^^^^^ +# ~~~~~~~~~~~~~~~~~~~~~~~~ # with NWBHDF5IO(filename4, "w") as io4: # Use link_data=True to specify default behavior to link rather than copy data @@ -185,7 +185,7 @@ #################### # Linking to whole Containers -# --------------------------- +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Appending to files and linking is made possible by passing around the same # :py:class:`~hdmf.build.manager.BuildManager`. You can get a manager to pass around @@ -203,7 +203,7 @@ #################### # Step 1: Get the container object you want to link to -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Now let's open our test files and retrieve our timeseries. # @@ -219,7 +219,7 @@ #################### # Step 2: Add the container to another NWBFile -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # To integrate both :py:meth:`~pynwb.base.TimeSeries` into a single file we simply create a new # :py:meth:`~pynwb.file.NWBFile` and add our existing :py:meth:`~pynwb.base.TimeSeries` to it. PyNWB's # :py:class:`~pynwb.NWBHDF5IO` backend then automatically detects that the TimeSeries have already @@ -247,7 +247,7 @@ # ------------------------------ # # Using the :py:func:`~pynwb.file.NWBFile.copy` method allows us to easily create a shallow copy -# of a whole NWB:N file with links to all data in the original file. For example, we may want to +# of a whole NWB file with links to all data in the original file. For example, we may want to # store processed data in a new file separate from the raw data, while still being able to access # the raw data. See the :ref:`scratch` tutorial for a detailed example. # @@ -259,5 +259,128 @@ # External links are convenient but to share data we may want to hand a single file with all the # data to our collaborator rather than having to collect all relevant files. To do this, # :py:class:`~hdmf.backends.hdf5.h5tools.HDF5IO` (and in turn :py:class:`~pynwb.NWBHDF5IO`) -# provide the convenience function :py:meth:`~hdmf.backends.hdf5.h5tools.HDF5IO.copy_file`, -# which copies an HDF5 file and resolves all external links. +# provide the convenience function :py:meth:`~hdmf.backends.hdf5.h5tools.HDF5IO.export`, +# which can copy the file and resolves all external links. + + +#################### +# Automatically splitting large data across multiple HDF5 files +# ------------------------------------------------------------------- +# +# For extremely large datasets it can be useful to split data across multiple files, e.g., in cases where +# the file stystem does not allow for large files. While we can achieve this by writing different +# components (e.g., :py:meth:`~pynwb.base.TimeSeries`) to different files as described above, +# this option does not allow splitting data from single datasets. An alternative option is to use the +# ``family`` driver in ``h5py`` to automatically split the NWB file into a collection of many HDF5 files. +# The ``family`` driver stores the file on disk as a series of fixed-length chunks (each in its own file). +# In practice, to write very large arrays, we can combine this approach with :ref:`iterative_write` to +# avoid having to load all data into memory. In the example shown here we use a manual approach to +# iterative write by using :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` to create an empty dataset and +# then filling in the data afterward. + +#################### +# Step 1: Create the NWBFile as usual +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +from pynwb import NWBFile +from pynwb.base import TimeSeries +from datetime import datetime +from hdmf.backends.hdf5 import H5DataIO +import numpy as np + +# Create an NWBFile object +nwbfile = NWBFile(session_description='example file family', + identifier=str(uuid4()), + session_start_time=datetime.now().astimezone()) + +# Create the data as an empty dataset so that we can write to it later +data = H5DataIO(maxshape=(None, 10), # make the first dimension expandable + dtype=np.float32, # create the data as float32 + shape=(0, 10), # initial data shape to initialize as empty dataset + chunks=(1000, 10) + ) + +# Create a TimeSeries object +time_series = TimeSeries(name='example_timeseries', + data=data, + starting_time=0.0, + rate=1.0, + unit='mV') + +# Add the TimeSeries to the NWBFile +nwbfile.add_acquisition(time_series) + +#################### +# Step 2: Open the new file with the `family` driver and write +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# Here we need to open the file with `h5py` first to set up the driver, and then we can use +# that file with :py:class:`pynwb.NWBHDF5IO`. This is required, because :py:class:`pynwb.NWBHDF5IO` +# currently does not support passing the `memb_size` option required by the `family` driver. + +import h5py +from pynwb import NWBHDF5IO + +# Define the size of the individual files, determining the number of files to create +# chunk_size = 1 * 1024**3 # 1GB per file +chunk_size = 1024**2 # 1MB just for testing + +# filename pattern +filename_pattern = 'family_nwb_file_%d.nwb' + +# Create the HDF5 file using the family driver +with h5py.File(name=filename_pattern, mode='w', driver='family', memb_size=chunk_size) as f: + + # Use NWBHDF5IO to write the NWBFile to the HDF5 file + with NWBHDF5IO(file=f, mode='w') as io: + io.write(nwbfile) + + # Write new data iteratively to the file + for i in range(10): + start_index = i * 1000 + stop_index = start_index + 1000 + data.dataset.resize((stop_index, 10)) # Resize the dataset + data.dataset[start_index: stop_index , :] = i # Set the additional values + +#################### +# .. note:: +# +# Alternatively, we could have also used the :ref:`iterative_write` features to write the data +# iteratively directly as part of the `io.write` call instead of manually afterward. + +#################### +# Step 3: Read a file written with the family driver +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# + + +# Open the HDF5 file using the family driver +with h5py.File(name=filename_pattern, mode='r', driver='family', memb_size=chunk_size) as f: + # Use NWBHDF5IO to read the NWBFile from the HDF5 file + with NWBHDF5IO(file=f, manager=None, mode='r') as io: + nwbfile = io.read() + print(nwbfile) + + +#################### +# .. note:: +# +# The filename you provide when using the ``family`` driver must contain a printf-style integer format code +# (e.g.`%d`), which will be replaced by the file sequence number. +# +# .. note:: +# +# The ``memb_size`` parameter must be set on both write and read. As such, reading the file requires +# the user to know the ``memb_size`` that was used for writing. +# +# .. warning:: +# +# The DANDI archive may not support NWB files that are split in this fashion. +# +# .. note:: +# +# Other file drivers, e.g., ``split`` or ``multi`` could be used in a similar fashion. +# However, not all HDF5 drivers are supported by the the high-level API of +# ``h5py`` and as such may require a bit more complex setup via the the +# low-level HDF5 API in ``h5py``. +# + diff --git a/docs/gallery/domain/images.py b/docs/gallery/domain/images.py index d6eef24b3..4e9214784 100644 --- a/docs/gallery/domain/images.py +++ b/docs/gallery/domain/images.py @@ -11,7 +11,8 @@ about the subject, the environment, the presented stimuli, or other parts related to the experiment. This tutorial focuses in particular on the usage of: -* :py:class:`~pynwb.image.OpticalSeries` for series of images that were presented as stimulus +* :py:class:`~pynwb.image.OpticalSeries` and :py:class:`~pynwb.misc.AbstractFeatureSeries` for series of images that + were presented as stimulus * :py:class:`~pynwb.image.ImageSeries`, for series of images (movie segments); * :py:class:`~pynwb.image.GrayscaleImage`, :py:class:`~pynwb.image.RGBImage`, :py:class:`~pynwb.image.RGBAImage`, for static images; @@ -19,23 +20,22 @@ The following examples will reference variables that may not be defined within the block they are used in. For clarity, we define them here: """ -# Define file paths used in the tutorial - -import os # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnails_image_data.png' from datetime import datetime +import os from uuid import uuid4 import numpy as np from dateutil import tz -from dateutil.tz import tzlocal from PIL import Image from pynwb import NWBHDF5IO, NWBFile from pynwb.base import Images from pynwb.image import GrayscaleImage, ImageSeries, OpticalSeries, RGBAImage, RGBImage +from pynwb.misc import AbstractFeatureSeries +# Define file paths used in the tutorial nwbfile_path = os.path.abspath("images_tutorial.nwb") moviefiles_path = [ os.path.abspath("image/file_1.tiff"), @@ -55,7 +55,7 @@ nwbfile = NWBFile( session_description="my first synthetic recording", identifier=str(uuid4()), - session_start_time=datetime.now(tzlocal()), + session_start_time=session_start_time, experimenter=[ "Baggins, Bilbo", ], @@ -109,6 +109,35 @@ nwbfile.add_stimulus(stimulus=optical_series) #################### +# AbstractFeatureSeries: Storing features of visual stimuli +# --------------------------------------------------------- +# +# While it is usually recommended to store the entire image data as an :py:class:`~pynwb.image.OpticalSeries`, sometimes +# it is useful to store features of the visual stimuli instead of or in addition to the raw image data. For example, +# you may want to store the mean luminance of the image, the contrast, or the spatial frequency. This can be done using +# an instance of :py:class:`~pynwb.misc.AbstractFeatureSeries`. This class is a general container for storing time +# series of features that are derived from the raw image data. + +# Create some fake feature data +feature_data = np.random.rand(200, 3) # 200 time points, 3 features + +# Create an AbstractFeatureSeries object +abstract_feature_series = AbstractFeatureSeries( + name="StimulusFeatures", + data=feature_data, + timestamps=np.linspace(0, 1, 200), + description="Features of the visual stimuli", + features=["luminance", "contrast", "spatial frequency"], + feature_units=["n.a.", "n.a.", "cycles/degree"], +) + +# Add the AbstractFeatureSeries to the NWBFile +nwbfile.add_stimulus(abstract_feature_series) + +#################### +# Like all :py:class:`~pynwb.base.TimeSeries`, :py:class:`~pynwb.misc.AbstractFeatureSeries` specify timing using +# either the ``rate`` and ``starting_time`` attributes or the ``timestamps`` attribute. +# # ImageSeries: Storing series of images as acquisition # ---------------------------------------------------- # @@ -118,7 +147,6 @@ # # We can add raw data to the :py:class:`~pynwb.file.NWBFile` object as *acquisition* using # the :py:meth:`~pynwb.file.NWBFile.add_acquisition` method. -# image_data = np.random.randint(low=0, high=255, size=(200, 50, 50, 3), dtype=np.uint8) behavior_images = ImageSeries( @@ -138,13 +166,13 @@ # ^^^^^^^^^^^^^^ # # External files (e.g. video files of the behaving animal) can be added to the :py:class:`~pynwb.file.NWBFile` -# by creating an :py:class:`~pynwb.image.ImageSeries` object using the +# by creating an :py:class:`~pynwb.image.ImageSeries` object using the # :py:attr:`~pynwb.image.ImageSeries.external_file` attribute that specifies # the path to the external file(s) on disk. # The file(s) path must be relative to the path of the NWB file. # Either ``external_file`` or ``data`` must be specified, but not both. # -# If the sampling rate is constant, use :py:attr:`~pynwb.base.TimeSeries.rate` and +# If the sampling rate is constant, use :py:attr:`~pynwb.base.TimeSeries.rate` and # :py:attr:`~pynwb.base.TimeSeries.starting_time` to specify time. # For irregularly sampled recordings, use :py:attr:`~pynwb.base.TimeSeries.timestamps` to specify time for each sample # image. @@ -152,7 +180,7 @@ # Each external image may contain one or more consecutive frames of the full :py:class:`~pynwb.image.ImageSeries`. # The :py:attr:`~pynwb.image.ImageSeries.starting_frame` attribute serves as an index to indicate which frame # each file contains. -# For example, if the ``external_file`` dataset has three paths to files and the first and the second file have 2 +# For example, if the ``external_file`` dataset has three paths to files and the first and the second file have 2 # frames, and the third file has 3 frames, then this attribute will have values `[0, 2, 4]`. external_file = [ diff --git a/docs/gallery/general/plot_configurator.py b/docs/gallery/general/plot_configurator.py index 52a2a6326..0156a8fef 100644 --- a/docs/gallery/general/plot_configurator.py +++ b/docs/gallery/general/plot_configurator.py @@ -55,6 +55,7 @@ from pynwb import NWBFile, get_loaded_type_config, load_type_config, unload_type_config from pynwb.file import Subject +#################################### # How to use a Configuration file # ------------------------------- # As mentioned prior, the first step after creating a configuration file is @@ -69,7 +70,10 @@ # the value of the fields are wrapped and then validated to see if it is a # permissible value in their respective :py:class:`~hdmf.term_set.TermSet`. -dir_path = os.path.dirname(os.path.abspath("__file__")) +try: + dir_path = os.path.dirname(os.path.abspath(__file__)) # when running as a .py +except NameError: + dir_path = os.path.dirname(os.path.abspath("__file__")) # when running as a script or notebook yaml_file = os.path.join(dir_path, 'nwb_gallery_config.yaml') load_type_config(config_path=yaml_file) diff --git a/docs/gallery/general/plot_timeintervals.py b/docs/gallery/general/plot_timeintervals.py index 4069fd4a4..905b3213c 100644 --- a/docs/gallery/general/plot_timeintervals.py +++ b/docs/gallery/general/plot_timeintervals.py @@ -92,7 +92,8 @@ # Additional columns can be added using :py:meth:`~pynwb.file.NWBFile.add_trial_column`. This method takes a name # for the column and a description of what the column stores. You do not need to supply data # type, as this will inferred. Once all columns have been added, trial data can be populated using -# :py:meth:`~pynwb.file.NWBFile.add_trial`. +# :py:meth:`~pynwb.file.NWBFile.add_trial`. Note that if you add a custom column, you must +# add at least one row to write the table to a file. # # Lets add an additional column and some trial data with tags and timeseries references. diff --git a/docs/source/export.rst b/docs/source/export.rst index 490cd346e..218184f9b 100644 --- a/docs/source/export.rst +++ b/docs/source/export.rst @@ -53,14 +53,12 @@ on the :py:class:`~pynwb.file.NWBFile` before exporting. How do I create a copy of an NWB file with different data layouts (e.g., applying compression)? --------------------------------------------------------------------------------------------------------- -Use the `h5repack `_ command line tool from the HDF5 Group. -See also this `h5repack tutorial `_. +Use the `h5repack `_ command line tool from the HDF5 Group. How do I create a copy of an NWB file with different controls over how links are treated and whether copies are deep or shallow? --------------------------------------------------------------------------------------------------------------------------------- -Use the `h5copy `_ command line tool from the HDF5 Group. -See also this `h5copy tutorial `_. +Use the `h5copy `_ command line tool from the HDF5 Group. How do I generate new object IDs for a newly exported NWB file? @@ -101,8 +99,8 @@ For example: export_io.export(src_io=read_io, nwbfile=nwbfile, write_args={'link_data': False}) # copy linked datasets # the written file will contain no links to external datasets -You can also the `h5copy `_ command line tool \ -from the HDF5 Group. See also this `h5copy tutorial `_. +You can also the `h5copy `_ command line tool \ +from the HDF5 Group. How do I write a newly instantiated ``NWBFile`` to two different file paths? diff --git a/docs/source/make_a_release.rst b/docs/source/make_a_release.rst index f88b627ea..ad258db34 100644 --- a/docs/source/make_a_release.rst +++ b/docs/source/make_a_release.rst @@ -20,25 +20,6 @@ Prerequisites * You have a `GPG signing key`_. -* Dependency versions in ``requirements.txt``, ``requirements-dev.txt``, ``requirements-opt.txt``, - ``requirements-doc.txt``, and ``requirements-min.txt`` are up-to-date. - -* Legal information and copyright dates in ``Legal.txt``, ``license.txt``, ``README.rst``, - ``docs/source/conf.py``, and any other files are up-to-date. - -* Package information in ``setup.py`` is up-to-date. - -* ``README.rst`` information is up-to-date. - -* The ``nwb-schema`` submodule is up-to-date. The version number should be checked manually in case syncing the - git submodule does not work as expected. - -* Documentation reflects any new features and changes in PyNWB functionality. - -* Documentation builds locally. - -* Documentation builds on the `ReadTheDocs project`_ on the "dev" build. - * Release notes have been prepared. * An appropriate new version number has been selected. @@ -56,6 +37,44 @@ Commands to evaluate starts with a dollar sign. For example:: means that ``echo "Hello"`` should be copied and evaluated in the terminal. +----------------------------------------------------- +Make pre-release pull request on GitHub: Step-by-step +----------------------------------------------------- + +1. Create a new branch locally or on GitHub. Update the ``CHANGELOG.md`` with the release date. + + .. code:: + + $ git checkout -b release-X.Y.Z + + +2. Create a pull request for the new release branch, then append the URL with: "&template=release.md". +For example, ``https://github.com/NeurodataWithoutBorders/pynwb/compare/dev...release-X.Y.Z?quick_pull=1&template=release.md`` + + +3. Follow the checklist in the template. The checklist covers the following steps in more detail: + + * Make sure all PRs to be included in this release have been merged to ``dev``. + + * Update package versions in ``requirements.txt``, ``requirements-dev.txt``, ``requirements-opt.txt``, + ``requirements-doc.txt``, ``requirements-min.txt``, ``environment-ros3.yml``, and ``pyproject.toml``. + + * Check legal information and copyright dates in ``Legal.txt``, ``license.txt``, ``README.rst``, + ``docs/source/conf.py``. + + * Update ``pyproject.toml`` and ``README.rst`` as needed. + + * Update ``src/pynwb/nwb-schema`` submodule as needed. Check the version number manually to make sure + we are using the latest release. + + * Update documentation to reflect new features and changes in PyNWB functionality. + + * Run tests locally, inspect all warnings and outputs, and try to remove all warnings. + + * Test documentation builds locally and on the `ReadTheDocs project`_ on the "dev" build. + + + ------------------------------------- Publish release on PyPI: Step-by-step @@ -144,7 +163,7 @@ Publish release on PyPI: Step-by-step python -c "import pynwb; print(pynwb.__version__)" -10. Cleanup +12. Cleanup On bash/zsh: diff --git a/environment-ros3.yml b/environment-ros3.yml index 21dcc5a9c..081408f19 100644 --- a/environment-ros3.yml +++ b/environment-ros3.yml @@ -6,17 +6,18 @@ channels: dependencies: - python==3.12 - h5py==3.11.0 - - hdmf==3.14.0 - - matplotlib==3.8.0 - - numpy==1.26.4 + - hdmf==3.14.3 + - matplotlib==3.8.4 + - numpy==2.1.1 - pandas==2.2.2 - python-dateutil==2.9.0 - setuptools - - pytest==7.4.3 # This is for the upcoming pytest update - - dandi==0.60.0 # NOTE: dandi does not support osx-arm64 - - fsspec==2024.2.0 - - requests==2.31.0 - - aiohttp==3.9.3 + - pytest==7.4.3 # pin to pytest < 8 because of incompatibilities to be addressed + - fsspec==2024.6.0 + - requests==2.32.3 + - aiohttp==3.9.5 - pip - pip: - - remfile==0.1.11 + - remfile==0.1.13 + - dandi==0.62.1 # NOTE: dandi is not available on conda for osx-arm64 + diff --git a/pyproject.toml b/pyproject.toml index 77d33e352..f798f2b5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,8 @@ authors = [ { name="Ryan Ly", email="rly@lbl.gov" }, { name="Oliver Ruebel", email="oruebel@lbl.gov" }, { name="Ben Dichter", email="ben.dichter@gmail.com" }, - { name="Matthew Avaylon", email="mavaylon@lbl.gov" } + { name="Matthew Avaylon", email="mavaylon@lbl.gov" }, + { name="Stephanie Prince", email="smprince@lbl.gov" }, ] description= "Package for working with Neurodata stored in the NWB format." readme = "README.rst" @@ -33,8 +34,8 @@ classifiers = [ ] dependencies = [ "h5py>=2.10", - "hdmf>=3.14.0", - "numpy>=1.18, <2.0", # pin below 2.0 until HDMF supports numpy 2.0 + "hdmf>=3.14.3", + "numpy>=1.18", "pandas>=1.1.5", "python-dateutil>=2.7.3", ] @@ -90,21 +91,22 @@ omit = [ ] [tool.ruff] -select = ["E", "F", "T100", "T201", "T203"] +lint.select = ["E", "F", "T100", "T201", "T203"] exclude = [ ".git", ".tox", "__pycache__", "build/", "dist/", - "src/nwb-schema", + "src/pynwb/nwb-schema", "docs/source/conf.py", + "docs/notebooks/*", "src/pynwb/_due.py", "test.py" # remove when pytest comes along ] line-length = 120 -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "tests/read_dandi/*" = ["T201"] "docs/gallery/*" = ["E402", "T201"] "src/*/__init__.py" = ["F401"] @@ -114,6 +116,6 @@ line-length = 120 # "test_gallery.py" = ["T201"] # Uncomment when test_gallery.py is created -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 17 diff --git a/requirements-doc.txt b/requirements-doc.txt index 90633ce88..30c41106d 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -13,4 +13,7 @@ lxml # used by dataframe_image when using the matplotlib backend hdf5plugin dandi>=0.46.6 hdmf-zarr -zarr<2.18.0 # limited until hdmf-zarr 0.8.0 is released to resolve issues with zarr>=2.18.0 +zarr<3 # limited to zarr<3 until hdmf-zarr resolves issues with zarr 3.0 +linkml-runtime==1.7.4; python_version >= "3.9" +schemasheets==0.2.1; python_version >= "3.9" +oaklib==0.5.32; python_version >= "3.9" \ No newline at end of file diff --git a/requirements-min.txt b/requirements-min.txt index a047d81c7..eef051b25 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,6 +1,6 @@ # minimum versions of package dependencies for installing PyNWB h5py==2.10 # support for selection of datasets with list of indices added in 2.10 -hdmf==3.14.0 +hdmf==3.14.3 numpy==1.18 pandas==1.1.5 python-dateutil==2.7.3 diff --git a/requirements.txt b/requirements.txt index 5b3c49ded..6d7a17623 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ # pinned dependencies to reproduce an entire development environment to use PyNWB h5py==3.11.0 -hdmf==3.14.0 -numpy==1.26.4 +hdmf==3.14.3 +numpy==2.1.1; python_version > "3.9" # numpy 2.1+ is not compatible with py3.9 +numpy==2.0.2; python_version == "3.9" pandas==2.2.2 python-dateutil==2.9.0.post0 diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 50db92dcc..1d109abe3 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -4,6 +4,9 @@ import os.path from pathlib import Path from copy import deepcopy +import subprocess +import pickle +from warnings import warn import h5py from hdmf.spec import NamespaceCatalog @@ -22,6 +25,16 @@ from .spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace # noqa E402 from .validate import validate # noqa: F401, E402 +try: + # see https://effigies.gitlab.io/posts/python-packaging-2023/ + from ._version import __version__ +except ImportError: # pragma: no cover + # this is a relatively slower method for getting the version string + from importlib.metadata import version # noqa: E402 + + __version__ = version("pynwb") + del version + @docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'}, {'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None}, @@ -50,7 +63,7 @@ def unload_type_config(**kwargs): type_map = kwargs['type_map'] or get_type_map() hdmf_unload_type_config(type_map=type_map) -def __get_resources(): +def __get_resources() -> dict: try: from importlib.resources import files except ImportError: @@ -60,27 +73,35 @@ def __get_resources(): __location_of_this_file = files(__name__) __core_ns_file_name = 'nwb.namespace.yaml' __schema_dir = 'nwb-schema/core' + cached_core_typemap = __location_of_this_file / 'core_typemap.pkl' + cached_version_indicator = __location_of_this_file / '.core_typemap_version' ret = dict() ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name) + ret['cached_typemap_path'] = str(cached_core_typemap) + ret['cached_version_indicator'] = str(cached_version_indicator) return ret def _get_resources(): # LEGACY: Needed to support legacy implementation. + # TODO: Remove this in PyNWB 3.0. + warn("The function '_get_resources' is deprecated and will be removed in a future release.", DeprecationWarning) return __get_resources() -# a global namespace catalog -global __NS_CATALOG +# a global type map global __TYPE_MAP -__NS_CATALOG = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) +__ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) hdmf_typemap = hdmf.common.get_type_map() -__TYPE_MAP = TypeMap(__NS_CATALOG) +__TYPE_MAP = TypeMap(__ns_catalog) __TYPE_MAP.merge(hdmf_typemap, ns_catalog=True) +# load the core namespace, i.e. base NWB specification +__resources = __get_resources() + @docval({'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting of paths to namespaces and TypeMaps', @@ -138,22 +159,95 @@ def load_namespaces(**kwargs): namespace_path = getargs('namespace_path', kwargs) return __TYPE_MAP.load_namespaces(namespace_path) +def available_namespaces(): + """Returns all namespaces registered in the namespace catalog""" + return __TYPE_MAP.namespace_catalog.namespaces -# load the core namespace, i.e. base NWB specification -__resources = __get_resources() -if os.path.exists(__resources['namespace_path']): - load_namespaces(__resources['namespace_path']) -else: - raise RuntimeError( - "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, you need to " - "use the --recurse_submodules flag when cloning. See developer installation instructions here: " - "https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository" - ) +def __git_cmd(*args) -> subprocess.CompletedProcess: + """ + Call git with the package as the directory regardless of cwd. -def available_namespaces(): - """Returns all namespaces registered in the namespace catalog""" - return __NS_CATALOG.namespaces + Since any folder within a git repo works, don't try to ascend to the top, since + if we're *not* actually in a git repo we're only guaranteed to know about + the inner `pynwb` directory. + """ + parent_dir = str(Path(__file__).parent) + result = subprocess.run(["git", "-C", parent_dir, *args], capture_output=True) + return result + + +def __clone_submodules(): + if __git_cmd('rev-parse').returncode == 0: + warn( + 'NWB core schema not found in cloned installation, initializing submodules...', + stacklevel=1) + res = __git_cmd('submodule', 'update', '--init', '--recursive') + if not res.returncode == 0: # pragma: no cover + raise RuntimeError( + 'Exception while initializing submodules, got:\n' + 'stdout:\n' + ('-'*20) + res.stdout + "\nstderr:\n" + ('-'*20) + res.stderr) + else: # pragma: no cover + raise RuntimeError("Package is not installed from a git repository, can't clone submodules") + + +def __load_core_namespace(final:bool=False): + """ + Load the core namespace into __TYPE_MAP, + either by loading a pickled version or creating one anew and pickling it. + + We keep a dotfile next to it that tracks what version of pynwb created it, + so that we invalidate it when the code changes. + + Args: + final (bool): This function tries again if the submodules aren't cloned, + but it shouldn't go into an infinite loop. + If final is ``True``, don't recurse. + """ + global __TYPE_MAP + global __resources + + # if we have a version indicator file and it doesn't match the current version, + # scrap the cached typemap + if os.path.exists(__resources['cached_version_indicator']): + with open(__resources['cached_version_indicator'], 'r') as f: + cached_version = f.read().strip() + if cached_version != __version__: + Path(__resources['cached_typemap_path']).unlink(missing_ok=True) + else: + # remove any cached typemap, forcing re-creation + Path(__resources['cached_typemap_path']).unlink(missing_ok=True) + + # load pickled typemap if we have one + if os.path.exists(__resources['cached_typemap_path']): + with open(__resources['cached_typemap_path'], 'rb') as f: + __TYPE_MAP = pickle.load(f) # type: TypeMap + + # otherwise make a new one and cache it + elif os.path.exists(__resources['namespace_path']): + load_namespaces(__resources['namespace_path']) + with open(__resources['cached_typemap_path'], 'wb') as f: + pickle.dump(__TYPE_MAP, f, protocol=pickle.HIGHEST_PROTOCOL) + with open(__resources['cached_version_indicator'], 'w') as f: + f.write(__version__) + + # otherwise, we don't have the schema and try and initialize from submodules, + # afterwards trying to load the namespace again + else: + try: + __clone_submodules() + except (FileNotFoundError, OSError, RuntimeError) as e: # pragma: no cover + if 'core' not in available_namespaces(): + warn( + "'core' is not a registered namespace. If you installed PyNWB locally using a git clone, " + "you need to use the --recurse_submodules flag when cloning. " + "See developer installation instructions here: " + "https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository\n" + f"Got exception: \n{e}" + ) + if not final: + __load_core_namespace(final=True) +__load_core_namespace() # a function to register a container classes with the global map @@ -269,7 +363,15 @@ def can_read(path: str): return False try: with h5py.File(path, "r") as file: # path is HDF5 file - return get_nwbfile_version(file)[1][0] >= 2 # Major version of NWB >= 2 + version_info = get_nwbfile_version(file) + if version_info[0] is None: + warn("Cannot read because missing NWB version in the HDF5 file. The file is not a valid NWB file.") + return False + elif version_info[1][0] < 2: # Major versions of NWB < 2 not supported + warn("Cannot read because PyNWB supports NWB files version 2 and above.") + return False + else: + return True except IOError: return False @@ -359,7 +461,9 @@ def read(self, **kwargs): 'default': None}, {'name': 'write_args', 'type': dict, 'doc': 'arguments to pass to :py:meth:`~hdmf.backends.io.HDMFIO.write_builder`', - 'default': None}) + 'default': None}, + {'name': 'cache_spec', 'type': bool, 'doc': 'whether to cache the specification to file', + 'default': True}) def export(self, **kwargs): """ Export an NWB file to a new NWB file using the HDF5 backend. @@ -416,15 +520,7 @@ def export(self, **kwargs): from hdmf.data_utils import DataChunkIterator # noqa: F401,E402 from hdmf.backends.hdf5 import H5DataIO # noqa: F401,E402 -try: - # see https://effigies.gitlab.io/posts/python-packaging-2023/ - from ._version import __version__ -except ImportError: # pragma: no cover - # this is a relatively slower method for getting the version string - from importlib.metadata import version # noqa: E402 - __version__ = version("pynwb") - del version from ._due import due, BibTeX # noqa: E402 due.cite( diff --git a/src/pynwb/file.py b/src/pynwb/file.py index dc7321c61..a447c126d 100644 --- a/src/pynwb/file.py +++ b/src/pynwb/file.py @@ -274,7 +274,6 @@ class NWBFile(MultiContainerInterface, HERDManager): {'name': 'subject', 'child': True, 'required_name': 'subject'}, {'name': 'sweep_table', 'child': True, 'required_name': 'sweep_table'}, {'name': 'invalid_times', 'child': True, 'required_name': 'invalid_times'}, - 'epoch_tags', # icephys_filtering is temporary. /intracellular_ephys/filtering dataset will be deprecated {'name': 'icephys_filtering', 'settable': False}, {'name': 'intracellular_recordings', 'child': True, @@ -365,8 +364,6 @@ class NWBFile(MultiContainerInterface, HERDManager): 'doc': 'Stimulus template TimeSeries objects belonging to this NWBFile', 'default': None}, {'name': 'epochs', 'type': TimeIntervals, 'doc': 'Epoch objects belonging to this NWBFile', 'default': None}, - {'name': 'epoch_tags', 'type': (tuple, list, set), - 'doc': 'A sorted list of tags used across all epochs', 'default': set()}, {'name': 'trials', 'type': TimeIntervals, 'doc': 'A table containing trial data', 'default': None}, {'name': 'invalid_times', 'type': TimeIntervals, @@ -429,7 +426,6 @@ def __init__(self, **kwargs): 'stimulus_template', 'keywords', 'processing', - 'epoch_tags', 'electrodes', 'electrode_groups', 'devices', @@ -559,6 +555,10 @@ def modules(self): warn("NWBFile.modules has been replaced by NWBFile.processing.", DeprecationWarning) return self.processing + @property + def epoch_tags(self): + return set(self.epochs.tags[:]) if self.epochs is not None else set() + @property def ec_electrode_groups(self): warn("NWBFile.ec_electrode_groups has been replaced by NWBFile.electrode_groups.", DeprecationWarning) @@ -620,7 +620,6 @@ def add_epoch_column(self, **kwargs): See :py:meth:`~hdmf.common.table.DynamicTable.add_column` for more details """ self.__check_epochs() - self.epoch_tags.update(kwargs.pop('tags', list())) self.epochs.add_column(**kwargs) def add_epoch_metadata_column(self, *args, **kwargs): @@ -642,8 +641,6 @@ def add_epoch(self, **kwargs): enclosure versus sleeping between explorations) """ self.__check_epochs() - if kwargs['tags'] is not None: - self.epoch_tags.update(kwargs['tags']) self.epochs.add_interval(**kwargs) def __check_electrodes(self): diff --git a/src/pynwb/io/file.py b/src/pynwb/io/file.py index c8bdbf56e..53d257a05 100644 --- a/src/pynwb/io/file.py +++ b/src/pynwb/io/file.py @@ -128,6 +128,15 @@ def __init__(self, spec): @ObjectMapper.object_attr('scratch_datas') def scratch_datas(self, container, manager): + """Set the value for the 'scratch_datas' spec on NWBFile to a list of ScratchData objects. + + Used when writing (building) the NWBFile container to a file. + + The 'scratch' group can contain both groups and datasets. This mapping function + is used when writing the value for the 'scratch_datas' spec (ScratchData type + -- see __init__ above). The value is set to a list of all ScratchData + objects in the 'scratch' field of the NWBFile container. + """ scratch = container.scratch ret = list() for s in scratch.values(): @@ -137,6 +146,15 @@ def scratch_datas(self, container, manager): @ObjectMapper.object_attr('scratch_containers') def scratch_containers(self, container, manager): + """Set the value for the 'scratch_containers' spec on NWBFile to a list of non-ScratchData objects. + + Used when writing (building) the NWBFile container to a file. + + The 'scratch' group can contain both groups and datasets. This mapping function + is used when writing the value for the 'scratch_containers' spec (NWBContainers + and DynamicTable type -- see __init__ above). The value is set to a list of all non-ScratchData + objects in the 'scratch' field of the NWBFile container. + """ scratch = container.scratch ret = list() for s in scratch.values(): @@ -146,6 +164,14 @@ def scratch_containers(self, container, manager): @ObjectMapper.constructor_arg('scratch') def scratch(self, builder, manager): + """Set the constructor arg for 'scratch' to a tuple of objects. + + Used when constructing the NWBFile container from a written file. + + The 'scratch' group can contain both groups and datasets. This mapping function + is used to construct the contained groups and datasets and put them into a single + field 'scratch' on the NWBFile container for user convenience. + """ scratch = builder.get('scratch') ret = list() if scratch is not None: @@ -157,28 +183,54 @@ def scratch(self, builder, manager): @ObjectMapper.constructor_arg('session_start_time') def dateconversion(self, builder, manager): + """Set the constructor arg for 'session_start_time' to a datetime object. + + Used when constructing the NWBFile container from a written file. + + Dates are read into builders as strings and are parsed into datetime objects + for user convenience and consistency with how they are written. + """ datestr = builder.get('session_start_time').data date = dateutil_parse(datestr) return date @ObjectMapper.constructor_arg('timestamps_reference_time') def dateconversion_trt(self, builder, manager): + """Set the constructor arg for 'timestamps_reference_time' to a datetime object. + + Used when constructing the NWBFile container from a written file. + + Dates are read into builders as strings and are parsed into datetime objects + for user convenience and consistency with how they are written. + """ datestr = builder.get('timestamps_reference_time').data date = dateutil_parse(datestr) return date @ObjectMapper.constructor_arg('file_create_date') def dateconversion_list(self, builder, manager): + """Set the constructor arg for 'file_create_date' to a datetime object. + + Used when constructing the NWBFile container from a written file. + + Dates are read into builders as strings and are parsed into datetime objects + for user convenience and consistency with how they are written. + """ datestr = builder.get('file_create_date').data dates = list(map(dateutil_parse, datestr)) return dates - @ObjectMapper.constructor_arg('file_name') - def name(self, builder, manager): - return builder.name - @ObjectMapper.constructor_arg('experimenter') def experimenter_carg(self, builder, manager): + """Set the constructor arg for 'experimenter' to a tuple if the builder value is a string. + + Used when constructing the NWBFile container from a written file. + + In early versions of the NWB 2 schema, 'experimenter' was specified as a string. + Then it was changed to be a 1-D array of strings. This mapping function is necessary + to allow reading of both data where 'experimenter' was specified as a string and data + where 'experimenter' was specified as an array. + """ ret = None exp_bldr = builder['general'].get('experimenter') if exp_bldr is not None: @@ -190,6 +242,14 @@ def experimenter_carg(self, builder, manager): @ObjectMapper.object_attr('experimenter') def experimenter_obj_attr(self, container, manager): + """Change the value for the field 'experimenter' on NWBFile to a tuple if it is a string. + + Used when writing (building) the NWBFile container to a file. + + In early versions of the NWB 2 schema, 'experimenter' was specified as a string. + Then it was changed to be a 1-D array of strings. This mapping function is necessary + for writing a valid 'experimenter' array if it is a string in the NWBFile container. + """ ret = None if isinstance(container.experimenter, str): ret = (container.experimenter,) @@ -197,6 +257,15 @@ def experimenter_obj_attr(self, container, manager): @ObjectMapper.constructor_arg('related_publications') def publications_carg(self, builder, manager): + """Set the constructor arg for 'related_publications' to a tuple if the builder value is a string. + + Used when constructing the NWBFile container from a written file. + + In early versions of the NWB 2 schema, 'related_publications' was specified as a string. + Then it was changed to be a 1-D array of strings. This mapping function is necessary + to allow reading of both data where 'related_publications' was specified as a string and data + where 'related_publications' was specified as an array. + """ ret = None pubs_bldr = builder['general'].get('related_publications') if pubs_bldr is not None: @@ -208,6 +277,14 @@ def publications_carg(self, builder, manager): @ObjectMapper.object_attr('related_publications') def publication_obj_attr(self, container, manager): + """Change the value for the field 'related_publications' on NWBFile to a tuple if it is a string. + + Used when writing (building) the NWBFile container to a file. + + In early versions of the NWB 2 schema, 'related_publications' was specified as a string. + Then it was changed to be a 1-D array of strings. This mapping function is necessary + for writing a valid 'related_publications' array if it is a string in the NWBFile container. + """ ret = None if isinstance(container.related_publications, str): ret = (container.related_publications,) @@ -219,6 +296,13 @@ class SubjectMap(ObjectMapper): @ObjectMapper.constructor_arg('date_of_birth') def dateconversion(self, builder, manager): + """Set the constructor arg for 'date_of_birth' to a datetime object. + + Used when constructing the Subject container from a written file. + + Dates are read into builders as strings and are parsed into datetime objects + for user convenience and consistency with how they are written. + """ dob_builder = builder.get('date_of_birth') if dob_builder is None: return @@ -229,6 +313,18 @@ def dateconversion(self, builder, manager): @ObjectMapper.constructor_arg("age__reference") def age_reference_none(self, builder, manager): + """Set the constructor arg for 'age__reference' to "unspecified" for NWB files < 2.6, else "birth". + + Used when constructing the Subject container from a written file. + + NWB schema 2.6.0 introduced a new optional attribute 'reference' on the 'age' dataset with a default + value of "birth". When data written with NWB versions < 2.6 are read, 'age__reference' is set to + "unspecified" in the Subject constructor. "unspecified" is a special non-None placeholder value + that is handled specially in Subject.__init__ to distinguish it from no value being provided by the + user. When data written with NWB versions >= 2.6 are read, 'age__reference' is set to the default + value, "birth", in the Subject constructor (this is not strictly necessary because Subject.__init__ + has default value "birth" for 'age__reference'). + """ age_builder = builder.get("age") age_reference = None if age_builder is not None: diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index aecfb2556..880f860a6 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -29,7 +29,7 @@ def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: return validator.validate(builder) -def _get_cached_namespaces_to_validate( +def get_cached_namespaces_to_validate( path: str, driver: Optional[str] = None, aws_region: Optional[str] = None, ) -> Tuple[List[str], BuildManager, Dict[str, str]]: """ @@ -39,14 +39,18 @@ def _get_cached_namespaces_to_validate( ------- The following example illustrates how we can use this function to validate against namespaces cached in a file. This is useful, e.g., when a file was created using an extension - >>> from pynwb import validate - >>> from pynwb.validate import _get_cached_namespaces_to_validate - >>> path = "my_nwb_file.nwb" - >>> validate_namespaces, manager, cached_namespaces = _get_cached_namespaces_to_validate(path) - >>> with NWBHDF5IO(path, "r", manager=manager) as reader: - >>> errors = [] - >>> for ns in validate_namespaces: - >>> errors += validate(io=reader, namespace=ns) + + .. code-block:: python + + from pynwb import validate + from pynwb.validate import get_cached_namespaces_to_validate + path = "my_nwb_file.nwb" + validate_namespaces, manager, cached_namespaces = get_cached_namespaces_to_validate(path) + with NWBHDF5IO(path, "r", manager=manager) as reader: + errors = [] + for ns in validate_namespaces: + errors += validate(io=reader, namespace=ns) + :param path: Path for the NWB file :return: Tuple with: - List of strings with the most specific namespace(s) to use for validation. @@ -149,7 +153,7 @@ def validate(**kwargs): io_kwargs = dict(path=path, mode="r", driver=driver) if use_cached_namespaces: - cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate( + cached_namespaces, manager, namespace_dependencies = get_cached_namespaces_to_validate( path=path, driver=driver ) io_kwargs.update(manager=manager) @@ -231,7 +235,7 @@ def validate_cli(): if args.list_namespaces: for path in args.paths: - cached_namespaces, _, _ = _get_cached_namespaces_to_validate(path=path) + cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=path) print("\n".join(cached_namespaces)) else: validation_errors, validation_status = validate( diff --git a/test.py b/test.py index 5bddb7c7d..f64fcd75d 100644 --- a/test.py +++ b/test.py @@ -3,6 +3,7 @@ import re import argparse import glob +import h5py import inspect import logging import os.path @@ -152,7 +153,11 @@ def validate_nwbs(): logging.info('running validation tests on NWB files') examples_nwbs = glob.glob('*.nwb') + # exclude files downloaded from dandi, validation of those files is handled by dandisets-health-status checks + examples_nwbs = [x for x in examples_nwbs if not x.startswith('sub-')] + import pynwb + from pynwb.validate import get_cached_namespaces_to_validate for nwb in examples_nwbs: try: @@ -161,27 +166,36 @@ def validate_nwbs(): ws = list() with warnings.catch_warnings(record=True) as tmp: logging.info("Validating with pynwb.validate method.") - with pynwb.NWBHDF5IO(nwb, mode='r') as io: - errors = pynwb.validate(io) - TOTAL += 1 - - if errors: - FAILURES += 1 - ERRORS += 1 - for err in errors: - print("Error: %s" % err) - - def get_namespaces(nwbfile): - comp = run(["python", "-m", "pynwb.validate", - "--list-namespaces", nwbfile], - stdout=PIPE, stderr=STDOUT, universal_newlines=True, timeout=30) - - if comp.returncode != 0: - return [] + is_family_nwb_file = False + try: + with pynwb.NWBHDF5IO(nwb, mode='r') as io: + errors = pynwb.validate(io) + except OSError as e: + # if the file was created with the family driver, need to use the family driver to open it + if 'family driver should be used' in str(e): + is_family_nwb_file = True + match = re.search(r'(\d+)', nwb) + filename_pattern = nwb[:match.start()] + '%d' + nwb[match.end():] # infer the filename pattern + memb_size = 1024**2 # note: the memb_size must be the same as the one used to create the file + with h5py.File(filename_pattern, mode='r', driver='family', memb_size=memb_size) as f: + with pynwb.NWBHDF5IO(file=f, manager=None, mode='r') as io: + errors = pynwb.validate(io) + else: + raise e + + TOTAL += 1 + + if errors: + FAILURES += 1 + ERRORS += 1 + for err in errors: + print("Error: %s" % err) - return comp.stdout.split() + # if file was created with family driver, skip pynwb.validate CLI because not yet supported + if is_family_nwb_file: + continue - namespaces = get_namespaces(nwb) + namespaces, _, _ = get_cached_namespaces_to_validate(nwb) if len(namespaces) == 0: FAILURES += 1 diff --git a/tests/back_compat/test_import_structure.py b/tests/back_compat/test_import_structure.py index 36831929d..81c4acf90 100644 --- a/tests/back_compat/test_import_structure.py +++ b/tests/back_compat/test_import_structure.py @@ -30,19 +30,14 @@ def test_outer_import_structure(self): "TimeSeries", "TypeMap", "_HDF5IO", - "__NS_CATALOG", - "__TYPE_MAP", "__builtins__", "__cached__", "__doc__", "__file__", - "__get_resources", - "__io", "__loader__", "__name__", "__package__", "__path__", - "__resources", "__spec__", "__version__", "_due", diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index d68334c89..e1ce4269b 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -531,3 +531,41 @@ def test_round_trip_with_pathlib_path(self): with NWBHDF5IO(pathlib_path, 'r') as io: read_file = io.read() self.assertContainerEqual(read_file, self.nwbfile) + + def test_can_read_current_nwb_file(self): + with NWBHDF5IO(self.path, 'w') as io: + io.write(self.nwbfile) + self.assertTrue(NWBHDF5IO.can_read(self.path)) + + def test_can_read_file_does_not_exits(self): + self.assertFalse(NWBHDF5IO.can_read('not_a_file.nwb')) + + def test_can_read_file_no_version(self): + # write the example file + with NWBHDF5IO(self.path, 'w') as io: + io.write(self.nwbfile) + # remove the version attribute + with File(self.path, mode='a') as io: + del io.attrs['nwb_version'] + + # assert can_read returns False and warning raised + warn_msg = "Cannot read because missing NWB version in the HDF5 file. The file is not a valid NWB file." + with self.assertWarnsWith(UserWarning, warn_msg): + self.assertFalse(NWBHDF5IO.can_read(self.path)) + + def test_can_read_file_old_version(self): + # write the example file + with NWBHDF5IO(self.path, 'w') as io: + io.write(self.nwbfile) + # set the version attribute <2.0 + with File(self.path, mode='a') as io: + io.attrs['nwb_version'] = "1.0.5" + + # assert can_read returns False and warning raised + warn_msg = "Cannot read because PyNWB supports NWB files version 2 and above." + with self.assertWarnsWith(UserWarning, warn_msg): + self.assertFalse(NWBHDF5IO.can_read(self.path)) + + def test_can_read_file_invalid_hdf5_file(self): + # current file is not an HDF5 file + self.assertFalse(NWBHDF5IO.can_read(__file__)) diff --git a/tests/integration/ros3/test_ros3.py b/tests/integration/ros3/test_ros3.py index 95a891760..2571e6199 100644 --- a/tests/integration/ros3/test_ros3.py +++ b/tests/integration/ros3/test_ros3.py @@ -1,6 +1,6 @@ from pynwb import NWBHDF5IO from pynwb import validate -from pynwb.validate import _get_cached_namespaces_to_validate +from pynwb.validate import get_cached_namespaces_to_validate from pynwb.testing import TestCase import urllib.request import h5py @@ -85,7 +85,7 @@ def test_dandi_get_cached_namespaces(self): ) } } - found_namespaces, _, found_namespace_dependencies = _get_cached_namespaces_to_validate( + found_namespaces, _, found_namespace_dependencies = get_cached_namespaces_to_validate( path=self.s3_test_path, driver="ros3" ) diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py index e2a060d20..5a564f975 100644 --- a/tests/unit/test_core.py +++ b/tests/unit/test_core.py @@ -135,10 +135,6 @@ def test_print_file(self): name1 , name2 } - epoch_tags: { - tag1, - tag2 - } epochs: epochs file_create_date: \[datetime.datetime\(.*\)\] identifier: identifier diff --git a/tests/unit/test_ecephys.py b/tests/unit/test_ecephys.py index 1ef0b7880..dc194af2a 100644 --- a/tests/unit/test_ecephys.py +++ b/tests/unit/test_ecephys.py @@ -118,7 +118,7 @@ def test_dimensions_warning(self): def test_get_data_in_units(self): samples = 100 - channels = 2 + channels = 5 conversion = 10.0 offset = 3.0 channel_conversion = np.random.rand(channels) diff --git a/tests/unit/test_file.py b/tests/unit/test_file.py index db24c17f3..23f993b02 100644 --- a/tests/unit/test_file.py +++ b/tests/unit/test_file.py @@ -144,6 +144,18 @@ def test_epoch_tags(self): tags = self.nwbfile.epoch_tags self.assertEqual(set(expected_tags), set(tags)) + def test_epoch_tags_single_string(self): + tags1 = 't1' + tags2 = 't2' + expected_tags = set([tags1, tags2]) + self.nwbfile.add_epoch(0.0, 1.0, tags=tags1) + self.nwbfile.add_epoch(1.0, 2.0, tags=tags2) + tags = self.nwbfile.epoch_tags + self.assertEqual(expected_tags, tags) + + def test_epoch_tags_no_table(self): + self.assertEqual(set(), self.nwbfile.epoch_tags) + def test_add_acquisition(self): self.nwbfile.add_acquisition(TimeSeries('test_ts', [0, 1, 2, 3, 4, 5], 'grams', timestamps=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5]))