From f395581eed97c19a3dfe7c6a71c1c74e8ff147a4 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Thu, 11 Apr 2024 18:03:05 +0300 Subject: [PATCH 1/5] Resolves #120 --- CHANGELOG.md | 9 ++++++++- one/__init__.py | 2 +- one/alf/io.py | 4 ++++ one/tests/alf/test_alf_io.py | 11 +++++++++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed0b6383..9e401517 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog -## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.7.0] +## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.8.0] +This version of ONE adds support for loading .npz files. + +### Modified + +- one.alf.io.load_fil_content loads npz files and returns only array if single compressed array with default name of 'arr_0'. + +## [2.7.0] This version of ONE adds support for Alyx 2.0.0 and pandas 3.0.0 with dataset QC filters. This version no longer supports 'data' search filter. ### Added diff --git a/one/__init__.py b/one/__init__.py index df50e79e..d2967c65 100644 --- a/one/__init__.py +++ b/one/__init__.py @@ -1,2 +1,2 @@ """The Open Neurophysiology Environment (ONE) API.""" -__version__ = '2.7.0' +__version__ = '2.8.0' diff --git a/one/alf/io.py b/one/alf/io.py index b2871684..6fba4d6c 100644 --- a/one/alf/io.py +++ b/one/alf/io.py @@ -307,6 +307,10 @@ def load_file_content(fil): return jsonable.read(fil) if fil.suffix == '.npy': return _ensure_flat(np.load(file=fil, allow_pickle=True)) + if fil.suffix == '.npz': + arr = np.load(file=fil) + # If single array with the default name ('arr_0') return individual array + return arr['arr_0'] if set(arr.files) == {'arr_0'} else arr if fil.suffix == '.pqt': return parquet.load(fil)[0] if fil.suffix == '.ssv': diff --git a/one/tests/alf/test_alf_io.py b/one/tests/alf/test_alf_io.py index aff1735e..f4f4b160 100644 --- a/one/tests/alf/test_alf_io.py +++ b/one/tests/alf/test_alf_io.py @@ -520,6 +520,10 @@ def setUp(self) -> None: self.xyz = Path(self.tmpdir.name) / 'foo.baz.xyz' with open(self.xyz, 'wb') as f: f.write(b'\x00\x00') + self.npz1 = Path(self.tmpdir.name) / 'foo.baz.npz' + np.savez_compressed(self.npz1, np.random.rand(5)) + self.npz2 = Path(self.tmpdir.name) / 'foo.bar.npz' + np.savez_compressed(self.npz2, np.random.rand(5), np.random.rand(5)) def test_load_file_content(self): """Test for one.alf.io.load_file_content""" @@ -550,6 +554,13 @@ def test_load_file_content(self): # Load YAML file loaded = alfio.load_file_content(str(self.yaml)) self.assertCountEqual(loaded.keys(), ['a', 'b']) + # Load npz file + loaded = alfio.load_file_content(str(self.npz1)) + self.assertIsInstance(loaded, np.ndarray, 'failed to unpack') + self.assertEqual(loaded.shape, (5,)) + loaded = alfio.load_file_content(str(self.npz2)) + self.assertIsInstance(loaded, np.lib.npyio.NpzFile, 'failed to return npz array') + self.assertEqual(loaded['arr_0'].shape, (5,)) def tearDown(self) -> None: self.tmpdir.cleanup() From 5adde39c3ecb65a42eb99430060b3de85493a983 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Tue, 7 May 2024 13:01:09 +0300 Subject: [PATCH 2/5] Warn when instantiating registration client with caching --- CHANGELOG.md | 1 + one/registration.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e401517..2193e92e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ This version of ONE adds support for loading .npz files. ### Modified - one.alf.io.load_fil_content loads npz files and returns only array if single compressed array with default name of 'arr_0'. +- log warning when instantiating RegistrationClient with AlyxClient REST cache active ## [2.7.0] This version of ONE adds support for Alyx 2.0.0 and pandas 3.0.0 with dataset QC filters. This version no longer supports 'data' search filter. diff --git a/one/registration.py b/one/registration.py index 05f0d374..11fbce14 100644 --- a/one/registration.py +++ b/one/registration.py @@ -94,6 +94,9 @@ def __init__(self, one=None): self.one = one if not one: self.one = ONE(cache_rest=None) + elif one.alyx.cache_mode == 'GET': + _logger.warning('AlyxClient REST cache active; ' + 'this may cause issues with registration.') self.dtypes = list(map(Bunch, self.one.alyx.rest('dataset-types', 'list'))) self.registration_patterns = [ dt['filename_pattern'] for dt in self.dtypes if dt['filename_pattern']] From 974a69c46f864f78db4ec9a227d1a52a08bcbed3 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Tue, 7 May 2024 13:24:57 +0300 Subject: [PATCH 3/5] Fix test failure caused by Alyx endpoint change --- one/tests/test_alyxrest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/one/tests/test_alyxrest.py b/one/tests/test_alyxrest.py index fdc4c93b..aabb4d34 100644 --- a/one/tests/test_alyxrest.py +++ b/one/tests/test_alyxrest.py @@ -258,6 +258,7 @@ def test_channels(self): self.addCleanup(self.alyx.rest, 'insertions', 'delete', id=probe_insertion['id']) trajectory = self.alyx.rest('trajectories', 'create', data={ 'probe_insertion': probe_insertion['id'], + 'chronic_insertion': None, 'x': 1500, 'y': -2000, 'z': 0, From 30b607c153d7287bc5fff92feda17161e625fed6 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Tue, 21 May 2024 20:02:29 +0300 Subject: [PATCH 4/5] Minor typo in CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2193e92e..0efe4d77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ This version of ONE adds support for loading .npz files. ### Modified -- one.alf.io.load_fil_content loads npz files and returns only array if single compressed array with default name of 'arr_0'. +- one.alf.io.load_file_content loads npz files and returns only array if single compressed array with default name of 'arr_0'. - log warning when instantiating RegistrationClient with AlyxClient REST cache active ## [2.7.0] From e0db315749cb71c2ee5ec11a164650fb186c7444 Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Tue, 2 Jul 2024 14:02:13 +0300 Subject: [PATCH 5/5] bugfix in load_collection when one or more files missing --- .github/workflows/main.yaml | 2 +- CHANGELOG.md | 1 + docs/notebooks/one_load/one_load.ipynb | 7 +++---- one/api.py | 15 ++++++++------- one/tests/test_one.py | 6 +++--- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 4c560dcc..ab2945af 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -35,7 +35,7 @@ jobs: - name: run tests run: | flake8 . - coverage run --omit=*one/tests/* -m unittest discover + coverage run --omit=one/tests/* -m unittest discover - name: Upload coverage data to coveralls.io run: coveralls --service=github env: diff --git a/CHANGELOG.md b/CHANGELOG.md index 0efe4d77..b422fb85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ This version of ONE adds support for loading .npz files. - one.alf.io.load_file_content loads npz files and returns only array if single compressed array with default name of 'arr_0'. - log warning when instantiating RegistrationClient with AlyxClient REST cache active +- bugfix in load_collection when one or more files missing ## [2.7.0] This version of ONE adds support for Alyx 2.0.0 and pandas 3.0.0 with dataset QC filters. This version no longer supports 'data' search filter. diff --git a/docs/notebooks/one_load/one_load.ipynb b/docs/notebooks/one_load/one_load.ipynb index f1ad5fdf..9ba340dc 100644 --- a/docs/notebooks/one_load/one_load.ipynb +++ b/docs/notebooks/one_load/one_load.ipynb @@ -286,7 +286,7 @@ "revision will be returned. The revisions are ordered lexicographically.\n", "\n", "```python\n", - "probe1_spikes = one.load_dataset(eid, 'trials.intervals.npy', revision='2021-03-15a')\n", + "intervals = one.load_dataset(eid, 'trials.intervals.npy', revision='2021-03-15a')\n", "```\n", "\n", "## Download only\n", @@ -662,8 +662,7 @@ " filepath = one.load_dataset(eid '_ibl_trials.intervals.npy', download_only=True)\n", " spike_times = one.load_dataset(eid 'spikes.times.npy', collection='alf/probe01')\n", " old_spikes = one.load_dataset(eid, 'spikes.times.npy',\n", - " collection='alf/probe01', revision='2020-08-31')\n", - "\n" + " collection='alf/probe01', revision='2020-08-31')\n" ] } ], @@ -733,4 +732,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/one/api.py b/one/api.py index a7b36754..9289336a 100644 --- a/one/api.py +++ b/one/api.py @@ -1357,7 +1357,7 @@ def load_collection(self, Query cache ('local') or Alyx database ('remote') download_only : bool When true the data are downloaded and the file path is returned. - **kwargs + kwargs Additional filters for datasets, including namespace and timescale. For full list see the one.alf.spec.describe function. @@ -1394,18 +1394,19 @@ def load_collection(self, if len(datasets) == 0: raise alferr.ALFObjectNotFound(object or '') parts = [alfiles.rel_path_parts(x) for x in datasets.rel_path] - unique_objects = set(x[3] or '' for x in parts) # For those that don't exist, download them offline = None if query_type == 'auto' else self.mode == 'local' files = self._check_filesystem(datasets, offline=offline) - files = [x for x in files if x] - if not files: + if not any(files): raise alferr.ALFObjectNotFound(f'ALF collection "{collection}" not found on disk') + # Remove missing items + files, parts = zip(*[(x, y) for x, y in zip(files, parts) if x]) if download_only: return files + unique_objects = set(x[3] or '' for x in parts) kwargs.update(wildcards=self.wildcards) collection = { obj: alfio.load_object([x for x, y in zip(files, parts) if y[3] == obj], **kwargs) @@ -1424,7 +1425,7 @@ def setup(cache_dir=None, silent=False, **kwargs): silent : (False) bool When True will prompt for cache_dir, if cache_dir is None, and overwrite cache if any. When False will use cwd for cache_dir, if cache_dir is None, and use existing cache. - **kwargs + kwargs Optional arguments to pass to one.alf.cache.make_parquet_db. Returns @@ -2498,7 +2499,7 @@ def setup(base_url=None, **kwargs): ---------- base_url : str An Alyx database URL. If None, the current default database is used. - **kwargs + kwargs Optional arguments to pass to one.params.setup. Returns @@ -2785,7 +2786,7 @@ def _setup(**kwargs): Parameters ---------- - **kwargs + kwargs See one.params.setup. Returns diff --git a/one/tests/test_one.py b/one/tests/test_one.py index 1f91a50c..530679c4 100644 --- a/one/tests/test_one.py +++ b/one/tests/test_one.py @@ -1338,7 +1338,7 @@ def test_list_datasets(self): self.one._cache['datasets'] = self.one._cache['datasets'].iloc[0:0].copy() dsets = self.one.list_datasets(self.eid, details=True, query_type='remote') - self.assertEqual(171, len(dsets)) # this may change after a BWM release or patch + self.assertEqual(183, len(dsets)) # this may change after a BWM release or patch self.assertEqual(1, dsets.index.nlevels, 'details data frame should be without eid index') # Test missing eid @@ -1355,12 +1355,12 @@ def test_list_datasets(self): # Test details=False, with eid dsets = self.one.list_datasets(self.eid, details=False, query_type='remote') self.assertIsInstance(dsets, list) - self.assertEqual(171, len(dsets)) # this may change after a BWM release or patch + self.assertEqual(183, len(dsets)) # this may change after a BWM release or patch # Test with other filters dsets = self.one.list_datasets(self.eid, collection='*probe*', filename='*channels*', details=False, query_type='remote') - self.assertEqual(20, len(dsets)) + self.assertEqual(24, len(dsets)) self.assertTrue(all(x in y for x in ('probe', 'channels') for y in dsets)) with self.assertWarns(Warning):