diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 4c560dcc..ab2945af 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -35,7 +35,7 @@ jobs: - name: run tests run: | flake8 . - coverage run --omit=*one/tests/* -m unittest discover + coverage run --omit=one/tests/* -m unittest discover - name: Upload coverage data to coveralls.io run: coveralls --service=github env: diff --git a/CHANGELOG.md b/CHANGELOG.md index ed0b6383..b422fb85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog -## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.7.0] +## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.8.0] +This version of ONE adds support for loading .npz files. + +### Modified + +- one.alf.io.load_file_content loads npz files and returns only array if single compressed array with default name of 'arr_0'. +- log warning when instantiating RegistrationClient with AlyxClient REST cache active +- bugfix in load_collection when one or more files missing + +## [2.7.0] This version of ONE adds support for Alyx 2.0.0 and pandas 3.0.0 with dataset QC filters. This version no longer supports 'data' search filter. ### Added diff --git a/docs/notebooks/one_load/one_load.ipynb b/docs/notebooks/one_load/one_load.ipynb index f1ad5fdf..9ba340dc 100644 --- a/docs/notebooks/one_load/one_load.ipynb +++ b/docs/notebooks/one_load/one_load.ipynb @@ -286,7 +286,7 @@ "revision will be returned. The revisions are ordered lexicographically.\n", "\n", "```python\n", - "probe1_spikes = one.load_dataset(eid, 'trials.intervals.npy', revision='2021-03-15a')\n", + "intervals = one.load_dataset(eid, 'trials.intervals.npy', revision='2021-03-15a')\n", "```\n", "\n", "## Download only\n", @@ -662,8 +662,7 @@ " filepath = one.load_dataset(eid '_ibl_trials.intervals.npy', download_only=True)\n", " spike_times = one.load_dataset(eid 'spikes.times.npy', collection='alf/probe01')\n", " old_spikes = one.load_dataset(eid, 'spikes.times.npy',\n", - " collection='alf/probe01', revision='2020-08-31')\n", - "\n" + " collection='alf/probe01', revision='2020-08-31')\n" ] } ], @@ -733,4 +732,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/one/__init__.py b/one/__init__.py index df50e79e..d2967c65 100644 --- a/one/__init__.py +++ b/one/__init__.py @@ -1,2 +1,2 @@ """The Open Neurophysiology Environment (ONE) API.""" -__version__ = '2.7.0' +__version__ = '2.8.0' diff --git a/one/alf/io.py b/one/alf/io.py index b2871684..6fba4d6c 100644 --- a/one/alf/io.py +++ b/one/alf/io.py @@ -307,6 +307,10 @@ def load_file_content(fil): return jsonable.read(fil) if fil.suffix == '.npy': return _ensure_flat(np.load(file=fil, allow_pickle=True)) + if fil.suffix == '.npz': + arr = np.load(file=fil) + # If single array with the default name ('arr_0') return individual array + return arr['arr_0'] if set(arr.files) == {'arr_0'} else arr if fil.suffix == '.pqt': return parquet.load(fil)[0] if fil.suffix == '.ssv': diff --git a/one/api.py b/one/api.py index a7b36754..9289336a 100644 --- a/one/api.py +++ b/one/api.py @@ -1357,7 +1357,7 @@ def load_collection(self, Query cache ('local') or Alyx database ('remote') download_only : bool When true the data are downloaded and the file path is returned. - **kwargs + kwargs Additional filters for datasets, including namespace and timescale. For full list see the one.alf.spec.describe function. @@ -1394,18 +1394,19 @@ def load_collection(self, if len(datasets) == 0: raise alferr.ALFObjectNotFound(object or '') parts = [alfiles.rel_path_parts(x) for x in datasets.rel_path] - unique_objects = set(x[3] or '' for x in parts) # For those that don't exist, download them offline = None if query_type == 'auto' else self.mode == 'local' files = self._check_filesystem(datasets, offline=offline) - files = [x for x in files if x] - if not files: + if not any(files): raise alferr.ALFObjectNotFound(f'ALF collection "{collection}" not found on disk') + # Remove missing items + files, parts = zip(*[(x, y) for x, y in zip(files, parts) if x]) if download_only: return files + unique_objects = set(x[3] or '' for x in parts) kwargs.update(wildcards=self.wildcards) collection = { obj: alfio.load_object([x for x, y in zip(files, parts) if y[3] == obj], **kwargs) @@ -1424,7 +1425,7 @@ def setup(cache_dir=None, silent=False, **kwargs): silent : (False) bool When True will prompt for cache_dir, if cache_dir is None, and overwrite cache if any. When False will use cwd for cache_dir, if cache_dir is None, and use existing cache. - **kwargs + kwargs Optional arguments to pass to one.alf.cache.make_parquet_db. Returns @@ -2498,7 +2499,7 @@ def setup(base_url=None, **kwargs): ---------- base_url : str An Alyx database URL. If None, the current default database is used. - **kwargs + kwargs Optional arguments to pass to one.params.setup. Returns @@ -2785,7 +2786,7 @@ def _setup(**kwargs): Parameters ---------- - **kwargs + kwargs See one.params.setup. Returns diff --git a/one/registration.py b/one/registration.py index 05f0d374..11fbce14 100644 --- a/one/registration.py +++ b/one/registration.py @@ -94,6 +94,9 @@ def __init__(self, one=None): self.one = one if not one: self.one = ONE(cache_rest=None) + elif one.alyx.cache_mode == 'GET': + _logger.warning('AlyxClient REST cache active; ' + 'this may cause issues with registration.') self.dtypes = list(map(Bunch, self.one.alyx.rest('dataset-types', 'list'))) self.registration_patterns = [ dt['filename_pattern'] for dt in self.dtypes if dt['filename_pattern']] diff --git a/one/tests/alf/test_alf_io.py b/one/tests/alf/test_alf_io.py index aff1735e..f4f4b160 100644 --- a/one/tests/alf/test_alf_io.py +++ b/one/tests/alf/test_alf_io.py @@ -520,6 +520,10 @@ def setUp(self) -> None: self.xyz = Path(self.tmpdir.name) / 'foo.baz.xyz' with open(self.xyz, 'wb') as f: f.write(b'\x00\x00') + self.npz1 = Path(self.tmpdir.name) / 'foo.baz.npz' + np.savez_compressed(self.npz1, np.random.rand(5)) + self.npz2 = Path(self.tmpdir.name) / 'foo.bar.npz' + np.savez_compressed(self.npz2, np.random.rand(5), np.random.rand(5)) def test_load_file_content(self): """Test for one.alf.io.load_file_content""" @@ -550,6 +554,13 @@ def test_load_file_content(self): # Load YAML file loaded = alfio.load_file_content(str(self.yaml)) self.assertCountEqual(loaded.keys(), ['a', 'b']) + # Load npz file + loaded = alfio.load_file_content(str(self.npz1)) + self.assertIsInstance(loaded, np.ndarray, 'failed to unpack') + self.assertEqual(loaded.shape, (5,)) + loaded = alfio.load_file_content(str(self.npz2)) + self.assertIsInstance(loaded, np.lib.npyio.NpzFile, 'failed to return npz array') + self.assertEqual(loaded['arr_0'].shape, (5,)) def tearDown(self) -> None: self.tmpdir.cleanup() diff --git a/one/tests/test_alyxrest.py b/one/tests/test_alyxrest.py index fdc4c93b..aabb4d34 100644 --- a/one/tests/test_alyxrest.py +++ b/one/tests/test_alyxrest.py @@ -258,6 +258,7 @@ def test_channels(self): self.addCleanup(self.alyx.rest, 'insertions', 'delete', id=probe_insertion['id']) trajectory = self.alyx.rest('trajectories', 'create', data={ 'probe_insertion': probe_insertion['id'], + 'chronic_insertion': None, 'x': 1500, 'y': -2000, 'z': 0, diff --git a/one/tests/test_one.py b/one/tests/test_one.py index 1f91a50c..530679c4 100644 --- a/one/tests/test_one.py +++ b/one/tests/test_one.py @@ -1338,7 +1338,7 @@ def test_list_datasets(self): self.one._cache['datasets'] = self.one._cache['datasets'].iloc[0:0].copy() dsets = self.one.list_datasets(self.eid, details=True, query_type='remote') - self.assertEqual(171, len(dsets)) # this may change after a BWM release or patch + self.assertEqual(183, len(dsets)) # this may change after a BWM release or patch self.assertEqual(1, dsets.index.nlevels, 'details data frame should be without eid index') # Test missing eid @@ -1355,12 +1355,12 @@ def test_list_datasets(self): # Test details=False, with eid dsets = self.one.list_datasets(self.eid, details=False, query_type='remote') self.assertIsInstance(dsets, list) - self.assertEqual(171, len(dsets)) # this may change after a BWM release or patch + self.assertEqual(183, len(dsets)) # this may change after a BWM release or patch # Test with other filters dsets = self.one.list_datasets(self.eid, collection='*probe*', filename='*channels*', details=False, query_type='remote') - self.assertEqual(20, len(dsets)) + self.assertEqual(24, len(dsets)) self.assertTrue(all(x in y for x in ('probe', 'channels') for y in dsets)) with self.assertWarns(Warning):