From eae8fb513b7d102d0e4ca6636f6ef234f523f0af Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Fri, 8 Nov 2024 12:06:49 -0800 Subject: [PATCH] Add NWBZarrIO.read_nwb convenience function (#226) * Fix #225 Add NWBZarrIO.read_nwb convenience function * Update Changelog --------- Co-authored-by: mavaylon1 --- CHANGELOG.md | 1 + src/hdmf_zarr/nwb.py | 28 ++++++++++++++- tests/unit/test_fsspec_streaming.py | 28 +++++++++++---- tests/unit/test_nwbzarrio.py | 56 +++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 tests/unit/test_nwbzarrio.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ecf61732..3c34edeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * Added test for opening file with consolidated metadata from DANDI. @mavaylon1 [#206](https://github.com/hdmf-dev/hdmf-zarr/pull/206) * Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207) * Added link_data --> clear_cache relationship to support repacking zarr nwbfiles: [#215](https://github.com/hdmf-dev/hdmf-zarr/pull/215) +* Added `NWBZarrIO.read_nwb` convenience method to simplify reading an NWB file. @oruebel [#226](https://github.com/hdmf-dev/hdmf-zarr/pull/226) ## 0.8.0 (June 4, 2024) ### Bug Fixes diff --git a/src/hdmf_zarr/nwb.py b/src/hdmf_zarr/nwb.py index 582f1a63..762b6b3c 100644 --- a/src/hdmf_zarr/nwb.py +++ b/src/hdmf_zarr/nwb.py @@ -1,6 +1,7 @@ """Module with Zarr backend for NWB for integration with PyNWB""" from warnings import warn -from .backend import ZarrIO +from pathlib import Path +from .backend import ZarrIO, SUPPORTED_ZARR_STORES from hdmf.utils import (docval, popargs, @@ -63,5 +64,30 @@ def export(self, **kwargs): kwargs['container'] = nwbfile super().export(**kwargs) + @staticmethod + @docval({'name': 'path', + 'type': (str, Path, *SUPPORTED_ZARR_STORES), + 'doc': 'the path to the Zarr file or a supported Zarr store'}, + is_method=False) + def read_nwb(**kwargs): + """ + Helper factory method for reading an NWB file and return the NWBFile object + """ + # Retrieve the filepath + path = popargs('path', kwargs) + if isinstance(path, Path): + path = str(path) + # determine default storage options to use when opening a file from S3 + storage_options = {} + if isinstance(path, str) and path.startswith(("s3://")): + storage_options = dict(anon=True) + + # open the file with NWBZarrIO and rad the file + io = NWBZarrIO(path=path, mode="r", load_namespaces=True, storage_options=storage_options) + nwbfile = io.read() + + # return the NWBFile object + return nwbfile + except ImportError: warn("PyNWB is not installed. Support for NWBZarrIO is disabled.") diff --git a/tests/unit/test_fsspec_streaming.py b/tests/unit/test_fsspec_streaming.py index cbb0bf06..05dc249c 100644 --- a/tests/unit/test_fsspec_streaming.py +++ b/tests/unit/test_fsspec_streaming.py @@ -8,17 +8,21 @@ class TestFSSpecStreaming(unittest.TestCase): - @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") - def test_fsspec_streaming(self): + + def setUp(self): # PLACEHOLDER test file from Allen Institute for Neural Dynamics # TODO: store a small test file and use it to speed up testing - remote_path = ( + self.s3_aind_path = ( "s3://aind-open-data/ecephys_625749_2022-08-03_15-15-06_nwb_2023-05-16_16-34-55/" "ecephys_625749_2022-08-03_15-15-06_nwb/" "ecephys_625749_2022-08-03_15-15-06_experiment1_recording1.nwb.zarr/" ) + # DANDISET: 000719/icephys_9_27_2024 + self.https_s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/7515c603-9940-4598-aa1b-8bf32dc9b10c/" - with NWBZarrIO(remote_path, mode="r", storage_options=dict(anon=True)) as io: + @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") + def test_fsspec_streaming(self): + with NWBZarrIO(self.s3_aind_path, mode="r", storage_options=dict(anon=True)) as io: nwbfile = io.read() self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06") @@ -32,10 +36,20 @@ def test_s3_open_with_consolidated_(self): """ The file is a Zarr file with consolidated metadata. """ - s3_path = "https://dandiarchive.s3.amazonaws.com/zarr/ccefbc9f-30e7-4a4c-b044-5b59d300040b/" - with NWBZarrIO(s3_path, mode='r') as read_io: + with NWBZarrIO(self.https_s3_path, mode='r') as read_io: read_io.open() self.assertIsInstance(read_io.file.store, zarr.storage.ConsolidatedMetadataStore) - with NWBZarrIO(s3_path, mode='-r') as read_io: + with NWBZarrIO(self.https_s3_path, mode='-r') as read_io: read_io.open() self.assertIsInstance(read_io.file.store, zarr.storage.FSStore) + + + @unittest.skipIf(not HAVE_FSSPEC, "fsspec not installed") + def test_fsspec_streaming_via_read_nwb(self): + """ + Test reading from s3 using the convenience function NWBZarrIO.read_nwb + """ + # Test with a s3:// URL + nwbfile = NWBZarrIO.read_nwb(self.s3_aind_path) + self.assertEqual(nwbfile.identifier, "ecephys_625749_2022-08-03_15-15-06") + self.assertEqual(nwbfile.institution, "AIND") diff --git a/tests/unit/test_nwbzarrio.py b/tests/unit/test_nwbzarrio.py new file mode 100644 index 00000000..ff408594 --- /dev/null +++ b/tests/unit/test_nwbzarrio.py @@ -0,0 +1,56 @@ +import unittest +from hdmf_zarr import NWBZarrIO +import os +import shutil +from datetime import datetime +from dateutil.tz import tzlocal + +try: + from pynwb import NWBFile + PYNWB_AVAILABLE = True +except ImportError: + PYNWB_AVAILABLE = False + + +@unittest.skipIf(not PYNWB_AVAILABLE, "PyNWB not installed") +class TestNWBZarrIO(unittest.TestCase): + + def setUp(self): + self.filepath = "test_io.zarr" + + def tearDown(self): + if os.path.exists(self.filepath): + shutil.rmtree(self.filepath) + + def write_test_file(self): + # Create the NWBFile + nwbfile = NWBFile( + session_description="my first synthetic recording", + identifier="EXAMPLE_ID", + session_start_time=datetime.now(tzlocal()), + experimenter="Dr. Bilbo Baggins", + lab="Bag End Laboratory", + institution="University of Middle Earth at the Shire", + experiment_description="I went on an adventure with thirteen dwarves " + "to reclaim vast treasures.", + session_id="LONELYMTN", + ) + + # Create a device + _ = nwbfile.create_device( + name="array", description="the best array", manufacturer="Probe Company 9000" + ) + with NWBZarrIO(path=self.filepath, mode="w") as io: + io.write(nwbfile) + + def test_read_nwb(self): + """ + Test reading a local file with NWBZarrIO.read_nwb. + + NOTE: See TestFSSpecStreaming.test_fsspec_streaming_via_read_nwb for corresponding tests + for reading a remote file with NWBZarrIO.read_nwb + """ + self.write_test_file() + nwbfile = NWBZarrIO.read_nwb(path=self.filepath) + self.assertEqual(len(nwbfile.devices), 1) + self.assertTupleEqual(nwbfile.experimenter, ('Dr. Bilbo Baggins',))