diff --git a/CHANGELOG.md b/CHANGELOG.md index 7483693a9..a1804a023 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## PyNWB 2.6.0 (Upcoming) ### Enhancements and minor changes +- For `NWBHDF5IO()`, change the default of arg `load_namespaces` from `False` to `True`. @bendichter [#1748](https://github.com/NeurodataWithoutBorders/pynwb/pull/1748) - Add `NWBHDF5IO.can_read()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) diff --git a/docs/gallery/advanced_io/linking_data.py b/docs/gallery/advanced_io/linking_data.py index 082aa3c51..82824f6cd 100644 --- a/docs/gallery/advanced_io/linking_data.py +++ b/docs/gallery/advanced_io/linking_data.py @@ -6,57 +6,50 @@ PyNWB supports linking between files using external links. -""" +Example Use Case: Integrating data from multiple files +--------------------------------------------------------- -#################### -# Example Use Case: Integrating data from multiple files -# --------------------------------------------------------- -# -# NBWContainer classes (e.g., :py:class:`~pynwb.base.TimeSeries`) support the integration of data stored in external -# HDF5 files with NWB data files via external links. To make things more concrete, let's look at the following use -# case. We want to simultaneously record multiple data streams during data acquisition. Using the concept of external -# links allows us to save each data stream to an external HDF5 files during data acquisition and to -# afterwards link the data into a single NWB:N file. In this case, each recording becomes represented by a -# separate file-system object that can be set as read-only once the experiment is done. In the following -# we are using :py:meth:`~pynwb.base.TimeSeries` as an example, but the same approach works for other -# NWBContainers as well. -# +NBWContainer classes (e.g., :py:class:`~pynwb.base.TimeSeries`) support the integration of data stored in external +HDF5 files with NWB data files via external links. To make things more concrete, let's look at the following use +case. We want to simultaneously record multiple data streams during data acquisition. Using the concept of external +links allows us to save each data stream to an external HDF5 files during data acquisition and to +afterwards link the data into a single NWB file. In this case, each recording becomes represented by a +separate file-system object that can be set as read-only once the experiment is done. In the following +we are using :py:meth:`~pynwb.base.TimeSeries` as an example, but the same approach works for other +NWBContainers as well. -#################### -# .. tip:: -# -# The same strategies we use here for creating External Links also apply to Soft Links. -# The main difference between soft and external links is that soft links point to other -# objects within the same file while external links point to objects in external files. -# +.. tip:: -#################### -# .. tip:: -# -# In the case of :py:meth:`~pynwb.base.TimeSeries`, the uncorrected timestamps generated by the acquisition -# system can be stored (or linked) in the *sync* group. In the NWB:N format, hardware-recorded time data -# must then be corrected to a common time base (e.g., timestamps from all hardware sources aligned) before -# it can be included in the *timestamps* of the *TimeSeries*. This means, in the case -# of :py:meth:`~pynwb.base.TimeSeries` we need to be careful that we are not including data with incompatible -# timestamps in the same file when using external links. -# + The same strategies we use here for creating External Links also apply to Soft Links. + The main difference between soft and external links is that soft links point to other + objects within the same file while external links point to objects in external files. -#################### -# .. warning:: -# -# External links can become stale/break. Since external links are pointing to data in other files -# external links may become invalid any time files are modified on the file system, e.g., renamed, -# moved or access permissions are changed. -# + .. tip:: -#################### -# Creating test data -# --------------------------- -# -# In the following we are creating two :py:meth:`~pynwb.base.TimeSeries` each written to a separate file. -# We then show how we can integrate these files into a single NWBFile. + In the case of :py:meth:`~pynwb.base.TimeSeries`, the uncorrected timestamps generated by the acquisition + system can be stored (or linked) in the *sync* group. In the NWB format, hardware-recorded time data + must then be corrected to a common time base (e.g., timestamps from all hardware sources aligned) before + it can be included in the *timestamps* of the *TimeSeries*. This means, in the case + of :py:meth:`~pynwb.base.TimeSeries` we need to be careful that we are not including data with incompatible + timestamps in the same file when using external links. + + +.. warning:: + + External links can become stale/break. Since external links are pointing to data in other files + external links may become invalid any time files are modified on the file system, e.g., renamed, + moved or access permissions are changed. + + +Creating test data +--------------------------- + +In the following we are creating two :py:meth:`~pynwb.base.TimeSeries` each written to a separate file. +We then show how we can integrate these files into a single NWBFile. +""" # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnails_linking_data.png' + from datetime import datetime from uuid import uuid4 diff --git a/docs/gallery/general/extensions.py b/docs/gallery/general/extensions.py index fa4f4cbb7..5140c531b 100644 --- a/docs/gallery/general/extensions.py +++ b/docs/gallery/general/extensions.py @@ -254,7 +254,7 @@ def __init__(self, **kwargs): # explicitly specify this. This behavior is enabled by the *load_namespaces* # argument to the :py:class:`~pynwb.NWBHDF5IO` constructor. -with NWBHDF5IO("cache_spec_example.nwb", mode="r", load_namespaces=True) as io: +with NWBHDF5IO("cache_spec_example.nwb", mode="r") as io: nwbfile = io.read() #################### diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 710f55ee8..7cf32e074 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -4,7 +4,6 @@ import os.path from pathlib import Path from copy import deepcopy -from warnings import warn import h5py from hdmf.spec import NamespaceCatalog @@ -244,8 +243,9 @@ def can_read(path: str): 'doc': 'the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x")', 'default': 'r'}, {'name': 'load_namespaces', 'type': bool, - 'doc': 'whether or not to load cached namespaces from given path - not applicable in write mode', - 'default': False}, + 'doc': ('whether or not to load cached namespaces from given path - not applicable in write mode ' + 'or when `manager` is not None or when `extensions` is not None'), + 'default': True}, {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', @@ -261,15 +261,10 @@ def __init__(self, **kwargs): popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'file', 'comm', 'driver', 'herd_path', kwargs) # Define the BuildManager to use - if load_namespaces: - if manager is not None: - warn("loading namespaces from file - ignoring 'manager'") - if extensions is not None: - warn("loading namespaces from file - ignoring 'extensions' argument") - # namespaces are not loaded when creating an NWBHDF5IO object in write mode - if 'w' in mode or mode == 'x': - raise ValueError("cannot load namespaces from file when writing to it") + if mode in 'wx' or manager is not None or extensions is not None: + load_namespaces = False + if load_namespaces: tm = get_type_map() super().load_namespaces(tm, path, file=file_obj, driver=driver) manager = BuildManager(tm) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 23b3aee6f..62aa41426 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -156,6 +156,7 @@ def validate(**kwargs): file=sys.stderr, ) else: + io_kwargs.update(load_namespaces=False) namespaces_to_validate = [CORE_NAMESPACE] if namespace is not None: diff --git a/tests/back_compat/test_import_structure.py b/tests/back_compat/test_import_structure.py index e5f931f5d..79d4f6ad0 100644 --- a/tests/back_compat/test_import_structure.py +++ b/tests/back_compat/test_import_structure.py @@ -82,7 +82,6 @@ def test_outer_import_structure(self): "spec", "testing", "validate", - "warn", ] for member in expected_structure: self.assertIn(member=member, container=current_structure) diff --git a/tests/back_compat/test_read.py b/tests/back_compat/test_read.py index 919ae6bde..792d26e7a 100644 --- a/tests/back_compat/test_read.py +++ b/tests/back_compat/test_read.py @@ -31,6 +31,16 @@ class TestReadOldVersions(TestCase): "- expected an array of shape '[None]', got non-array data 'one publication'")], } + def get_io(self, path): + """Get an NWBHDF5IO object for the given path.""" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + return NWBHDF5IO(str(path), 'r') + def test_read(self): """Test reading and validating all NWB files in the same folder as this file. @@ -43,7 +53,7 @@ def test_read(self): with self.subTest(file=f.name): with warnings.catch_warnings(record=True) as warnings_on_read: warnings.simplefilter("always") - with NWBHDF5IO(str(f), 'r', load_namespaces=True) as io: + with self.get_io(f) as io: errors = validate(io) io.read() for w in warnings_on_read: @@ -69,28 +79,28 @@ def test_read(self): def test_read_timeseries_no_data(self): """Test that a TimeSeries written without data is read with data set to the default value.""" f = Path(__file__).parent / '1.5.1_timeseries_no_data.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_timeseries'].data, TimeSeries.DEFAULT_DATA) def test_read_timeseries_no_unit(self): """Test that an ImageSeries written without unit is read with unit set to the default value.""" f = Path(__file__).parent / '1.5.1_timeseries_no_unit.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_timeseries'].unit, TimeSeries.DEFAULT_UNIT) def test_read_imageseries_no_data(self): """Test that an ImageSeries written without data is read with data set to the default value.""" f = Path(__file__).parent / '1.5.1_imageseries_no_data.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_imageseries'].data, ImageSeries.DEFAULT_DATA) def test_read_imageseries_no_unit(self): """Test that an ImageSeries written without unit is read with unit set to the default value.""" f = Path(__file__).parent / '1.5.1_imageseries_no_unit.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_imageseries'].unit, ImageSeries.DEFAULT_UNIT) @@ -100,7 +110,7 @@ def test_read_imageseries_non_external_format(self): f = Path(__file__).parent / fbase expected_warning = self.expected_warnings[fbase][0] with self.assertWarnsWith(UserWarning, expected_warning): - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_imageseries'].format, "tiff") @@ -110,13 +120,13 @@ def test_read_imageseries_nonmatch_starting_frame(self): f = Path(__file__).parent / fbase expected_warning = self.expected_warnings[fbase][0] with self.assertWarnsWith(UserWarning, expected_warning): - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_imageseries'].starting_frame, [1, 2, 3]) def test_read_subject_no_age__reference(self): """Test that reading a Subject without an age__reference set with NWB schema 2.5.0 sets the value to None""" f = Path(__file__).parent / '2.2.0_subject_no_age__reference.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertIsNone(read_nwbfile.subject.age__reference) diff --git a/tests/read_dandi/test_read_dandi.py b/tests/read_dandi/test_read_dandi.py index 0e0698d77..f9dafd938 100644 --- a/tests/read_dandi/test_read_dandi.py +++ b/tests/read_dandi/test_read_dandi.py @@ -47,7 +47,7 @@ def read_first_nwb_asset(): s3_url = first_asset.get_content_url(follow_redirects=1, strip_query=True) try: - with NWBHDF5IO(path=s3_url, load_namespaces=True, driver="ros3") as io: + with NWBHDF5IO(path=s3_url, driver="ros3") as io: io.read() except Exception as e: print(traceback.format_exc()) diff --git a/tests/unit/test_file.py b/tests/unit/test_file.py index c9bd98ad0..756009ff3 100644 --- a/tests/unit/test_file.py +++ b/tests/unit/test_file.py @@ -527,6 +527,7 @@ def test_subject_age_duration(self): class TestCacheSpec(TestCase): + """Test whether the file can be written and read when caching the spec.""" def setUp(self): self.path = 'unittest_cached_spec.nwb' @@ -535,18 +536,20 @@ def tearDown(self): remove_test_file(self.path) def test_simple(self): - nwbfile = NWBFile(' ', ' ', + nwbfile = NWBFile('sess_desc', 'identifier', datetime.now(tzlocal()), file_create_date=datetime.now(tzlocal()), institution='University of California, San Francisco', lab='Chang Lab') with NWBHDF5IO(self.path, 'w') as io: io.write(nwbfile) - with NWBHDF5IO(self.path, 'r', load_namespaces=True) as reader: + with NWBHDF5IO(self.path, 'r') as reader: nwbfile = reader.read() + assert nwbfile.session_description == "sess_desc" class TestNoCacheSpec(TestCase): + """Test whether the file can be written and read when not caching the spec.""" def setUp(self): self.path = 'unittest_cached_spec.nwb' @@ -555,7 +558,7 @@ def tearDown(self): remove_test_file(self.path) def test_simple(self): - nwbfile = NWBFile(' ', ' ', + nwbfile = NWBFile('sess_desc', 'identifier', datetime.now(tzlocal()), file_create_date=datetime.now(tzlocal()), institution='University of California, San Francisco', @@ -563,8 +566,9 @@ def test_simple(self): with NWBHDF5IO(self.path, 'w') as io: io.write(nwbfile, cache_spec=False) - with NWBHDF5IO(self.path, 'r', load_namespaces=True) as reader: + with NWBHDF5IO(self.path, 'r') as reader: nwbfile = reader.read() + assert nwbfile.session_description == "sess_desc" class TestTimestampsRefDefault(TestCase): diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 74ce0992c..6aa2ee25e 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -199,64 +199,54 @@ class TestValidateFunction(TestCase): # 1.0.3_nwbfile.nwb has cached "core" specification # 1.1.2_nwbfile.nwb has cached "core" and "hdmf-common" specificaitions + def get_io(self, path): + """Get an NWBHDF5IO object for the given path, ignoring the warning about ignoring cached namespaces.""" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + return NWBHDF5IO(str(path), 'r') + def test_validate_io_no_cache(self): """Test that validating a file with no cached spec against the core namespace succeeds.""" - with NWBHDF5IO('tests/back_compat/1.0.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: errors = validate(io) self.assertEqual(errors, []) def test_validate_io_no_cache_bad_ns(self): """Test that validating a file with no cached spec against a specified, unknown namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.0.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""): validate(io, 'notfound') def test_validate_io_cached(self): """Test that validating a file with cached spec against its cached namespace succeeds.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: errors = validate(io) self.assertEqual(errors, []) def test_validate_io_cached_extension(self): """Test that validating a file with cached spec against its cached namespaces succeeds.""" - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: - errors = validate(io) - self.assertEqual(errors, []) + with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + errors = validate(io) + self.assertEqual(errors, []) def test_validate_io_cached_extension_pass_ns(self): """Test that validating a file with cached extension spec against the extension namespace succeeds.""" - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: - errors = validate(io, 'ndx-testextension') - self.assertEqual(errors, []) + with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + errors = validate(io, 'ndx-testextension') + self.assertEqual(errors, []) def test_validate_io_cached_core_with_io(self): """ For back-compatability, test that validating a file with cached extension spec against the core namespace succeeds when using the `io` + `namespace` keywords. """ - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO( - path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb', mode='r', load_namespaces=True - ) as io: - results = validate(io=io, namespace="core") - self.assertEqual(results, []) + with self.get_io(path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + results = validate(io=io, namespace="core") + self.assertEqual(results, []) def test_validate_file_cached_extension(self): """ @@ -310,13 +300,13 @@ def test_validate_file_cached_no_cache_bad_ns(self): def test_validate_io_cached_bad_ns(self): """Test that validating a file with cached spec against a specified, unknown namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""): validate(io, 'notfound') def test_validate_io_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: # TODO this error should not be different from the error when using the validate script above msg = "builder must have data type defined with attribute 'data_type'" with self.assertRaisesWith(ValueError, msg):