From f395581eed97c19a3dfe7c6a71c1c74e8ff147a4 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Thu, 11 Apr 2024 18:03:05 +0300
Subject: [PATCH 1/5] Resolves #120

---
 CHANGELOG.md                 |  9 ++++++++-
 one/__init__.py              |  2 +-
 one/alf/io.py                |  4 ++++
 one/tests/alf/test_alf_io.py | 11 +++++++++++
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed0b6383..9e401517 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
-## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.7.0]
+## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.8.0]
+This version of ONE adds support for loading .npz files.
+
+### Modified
+
+- one.alf.io.load_fil_content loads npz files and returns only array if single compressed array with default name of 'arr_0'.
+
+## [2.7.0]
 This version of ONE adds support for Alyx 2.0.0 and pandas 3.0.0 with dataset QC filters. This version no longer supports 'data' search filter.
 
 ### Added
diff --git a/one/__init__.py b/one/__init__.py
index df50e79e..d2967c65 100644
--- a/one/__init__.py
+++ b/one/__init__.py
@@ -1,2 +1,2 @@
 """The Open Neurophysiology Environment (ONE) API."""
-__version__ = '2.7.0'
+__version__ = '2.8.0'
diff --git a/one/alf/io.py b/one/alf/io.py
index b2871684..6fba4d6c 100644
--- a/one/alf/io.py
+++ b/one/alf/io.py
@@ -307,6 +307,10 @@ def load_file_content(fil):
         return jsonable.read(fil)
     if fil.suffix == '.npy':
         return _ensure_flat(np.load(file=fil, allow_pickle=True))
+    if fil.suffix == '.npz':
+        arr = np.load(file=fil)
+        # If single array with the default name ('arr_0') return individual array
+        return arr['arr_0'] if set(arr.files) == {'arr_0'} else arr
     if fil.suffix == '.pqt':
         return parquet.load(fil)[0]
     if fil.suffix == '.ssv':
diff --git a/one/tests/alf/test_alf_io.py b/one/tests/alf/test_alf_io.py
index aff1735e..f4f4b160 100644
--- a/one/tests/alf/test_alf_io.py
+++ b/one/tests/alf/test_alf_io.py
@@ -520,6 +520,10 @@ def setUp(self) -> None:
         self.xyz = Path(self.tmpdir.name) / 'foo.baz.xyz'
         with open(self.xyz, 'wb') as f:
             f.write(b'\x00\x00')
+        self.npz1 = Path(self.tmpdir.name) / 'foo.baz.npz'
+        np.savez_compressed(self.npz1, np.random.rand(5))
+        self.npz2 = Path(self.tmpdir.name) / 'foo.bar.npz'
+        np.savez_compressed(self.npz2, np.random.rand(5), np.random.rand(5))
 
     def test_load_file_content(self):
         """Test for one.alf.io.load_file_content"""
@@ -550,6 +554,13 @@ def test_load_file_content(self):
         # Load YAML file
         loaded = alfio.load_file_content(str(self.yaml))
         self.assertCountEqual(loaded.keys(), ['a', 'b'])
+        # Load npz file
+        loaded = alfio.load_file_content(str(self.npz1))
+        self.assertIsInstance(loaded, np.ndarray, 'failed to unpack')
+        self.assertEqual(loaded.shape, (5,))
+        loaded = alfio.load_file_content(str(self.npz2))
+        self.assertIsInstance(loaded, np.lib.npyio.NpzFile, 'failed to return npz array')
+        self.assertEqual(loaded['arr_0'].shape, (5,))
 
     def tearDown(self) -> None:
         self.tmpdir.cleanup()

From 5adde39c3ecb65a42eb99430060b3de85493a983 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@3tk.co>
Date: Tue, 7 May 2024 13:01:09 +0300
Subject: [PATCH 2/5] Warn when instantiating registration client with caching

---
 CHANGELOG.md        | 1 +
 one/registration.py | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9e401517..2193e92e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ This version of ONE adds support for loading .npz files.
 ### Modified
 
 - one.alf.io.load_fil_content loads npz files and returns only array if single compressed array with default name of 'arr_0'.
+- log warning when instantiating RegistrationClient with AlyxClient REST cache active
 
 ## [2.7.0]
 This version of ONE adds support for Alyx 2.0.0 and pandas 3.0.0 with dataset QC filters. This version no longer supports 'data' search filter.
diff --git a/one/registration.py b/one/registration.py
index 05f0d374..11fbce14 100644
--- a/one/registration.py
+++ b/one/registration.py
@@ -94,6 +94,9 @@ def __init__(self, one=None):
         self.one = one
         if not one:
             self.one = ONE(cache_rest=None)
+        elif one.alyx.cache_mode == 'GET':
+            _logger.warning('AlyxClient REST cache active; '
+                            'this may cause issues with registration.')
         self.dtypes = list(map(Bunch, self.one.alyx.rest('dataset-types', 'list')))
         self.registration_patterns = [
             dt['filename_pattern'] for dt in self.dtypes if dt['filename_pattern']]

From 974a69c46f864f78db4ec9a227d1a52a08bcbed3 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@3tk.co>
Date: Tue, 7 May 2024 13:24:57 +0300
Subject: [PATCH 3/5] Fix test failure caused by Alyx endpoint change

---
 one/tests/test_alyxrest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/one/tests/test_alyxrest.py b/one/tests/test_alyxrest.py
index fdc4c93b..aabb4d34 100644
--- a/one/tests/test_alyxrest.py
+++ b/one/tests/test_alyxrest.py
@@ -258,6 +258,7 @@ def test_channels(self):
         self.addCleanup(self.alyx.rest, 'insertions', 'delete', id=probe_insertion['id'])
         trajectory = self.alyx.rest('trajectories', 'create', data={
             'probe_insertion': probe_insertion['id'],
+            'chronic_insertion': None,
             'x': 1500,
             'y': -2000,
             'z': 0,

From 30b607c153d7287bc5fff92feda17161e625fed6 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Tue, 21 May 2024 20:02:29 +0300
Subject: [PATCH 4/5] Minor typo in CHANGELOG

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2193e92e..0efe4d77 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@ This version of ONE adds support for loading .npz files.
 
 ### Modified
 
-- one.alf.io.load_fil_content loads npz files and returns only array if single compressed array with default name of 'arr_0'.
+- one.alf.io.load_file_content loads npz files and returns only array if single compressed array with default name of 'arr_0'.
 - log warning when instantiating RegistrationClient with AlyxClient REST cache active
 
 ## [2.7.0]

From e0db315749cb71c2ee5ec11a164650fb186c7444 Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Tue, 2 Jul 2024 14:02:13 +0300
Subject: [PATCH 5/5] bugfix in load_collection when one or more files missing

---
 .github/workflows/main.yaml            |  2 +-
 CHANGELOG.md                           |  1 +
 docs/notebooks/one_load/one_load.ipynb |  7 +++----
 one/api.py                             | 15 ++++++++-------
 one/tests/test_one.py                  |  6 +++---
 5 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 4c560dcc..ab2945af 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -35,7 +35,7 @@ jobs:
     - name: run tests
       run: |
         flake8 .
-        coverage run --omit=*one/tests/* -m unittest discover
+        coverage run --omit=one/tests/* -m unittest discover
     - name: Upload coverage data to coveralls.io
       run: coveralls --service=github
       env:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0efe4d77..b422fb85 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ This version of ONE adds support for loading .npz files.
 
 - one.alf.io.load_file_content loads npz files and returns only array if single compressed array with default name of 'arr_0'.
 - log warning when instantiating RegistrationClient with AlyxClient REST cache active
+- bugfix in load_collection when one or more files missing 
 
 ## [2.7.0]
 This version of ONE adds support for Alyx 2.0.0 and pandas 3.0.0 with dataset QC filters. This version no longer supports 'data' search filter.
diff --git a/docs/notebooks/one_load/one_load.ipynb b/docs/notebooks/one_load/one_load.ipynb
index f1ad5fdf..9ba340dc 100644
--- a/docs/notebooks/one_load/one_load.ipynb
+++ b/docs/notebooks/one_load/one_load.ipynb
@@ -286,7 +286,7 @@
     "revision will be returned.  The revisions are ordered lexicographically.\n",
     "\n",
     "```python\n",
-    "probe1_spikes = one.load_dataset(eid, 'trials.intervals.npy', revision='2021-03-15a')\n",
+    "intervals = one.load_dataset(eid, 'trials.intervals.npy', revision='2021-03-15a')\n",
     "```\n",
     "\n",
     "## Download only\n",
@@ -662,8 +662,7 @@
       "    filepath = one.load_dataset(eid '_ibl_trials.intervals.npy', download_only=True)\n",
       "    spike_times = one.load_dataset(eid 'spikes.times.npy', collection='alf/probe01')\n",
       "    old_spikes = one.load_dataset(eid, 'spikes.times.npy',\n",
-      "                                  collection='alf/probe01', revision='2020-08-31')\n",
-      "\n"
+      "                                  collection='alf/probe01', revision='2020-08-31')\n"
      ]
     }
    ],
@@ -733,4 +732,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
\ No newline at end of file
+}
diff --git a/one/api.py b/one/api.py
index a7b36754..9289336a 100644
--- a/one/api.py
+++ b/one/api.py
@@ -1357,7 +1357,7 @@ def load_collection(self,
             Query cache ('local') or Alyx database ('remote')
         download_only : bool
             When true the data are downloaded and the file path is returned.
-        **kwargs
+        kwargs
             Additional filters for datasets, including namespace and timescale. For full list
             see the one.alf.spec.describe function.
 
@@ -1394,18 +1394,19 @@ def load_collection(self,
         if len(datasets) == 0:
             raise alferr.ALFObjectNotFound(object or '')
         parts = [alfiles.rel_path_parts(x) for x in datasets.rel_path]
-        unique_objects = set(x[3] or '' for x in parts)
 
         # For those that don't exist, download them
         offline = None if query_type == 'auto' else self.mode == 'local'
         files = self._check_filesystem(datasets, offline=offline)
-        files = [x for x in files if x]
-        if not files:
+        if not any(files):
             raise alferr.ALFObjectNotFound(f'ALF collection "{collection}" not found on disk')
+        # Remove missing items
+        files, parts = zip(*[(x, y) for x, y in zip(files, parts) if x])
 
         if download_only:
             return files
 
+        unique_objects = set(x[3] or '' for x in parts)
         kwargs.update(wildcards=self.wildcards)
         collection = {
             obj: alfio.load_object([x for x, y in zip(files, parts) if y[3] == obj], **kwargs)
@@ -1424,7 +1425,7 @@ def setup(cache_dir=None, silent=False, **kwargs):
         silent : (False) bool
             When True will prompt for cache_dir, if cache_dir is None, and overwrite cache if any.
             When False will use cwd for cache_dir, if cache_dir is None, and use existing cache.
-        **kwargs
+        kwargs
             Optional arguments to pass to one.alf.cache.make_parquet_db.
 
         Returns
@@ -2498,7 +2499,7 @@ def setup(base_url=None, **kwargs):
         ----------
         base_url : str
             An Alyx database URL.  If None, the current default database is used.
-        **kwargs
+        kwargs
             Optional arguments to pass to one.params.setup.
 
         Returns
@@ -2785,7 +2786,7 @@ def _setup(**kwargs):
 
     Parameters
     ----------
-    **kwargs
+    kwargs
         See one.params.setup.
 
     Returns
diff --git a/one/tests/test_one.py b/one/tests/test_one.py
index 1f91a50c..530679c4 100644
--- a/one/tests/test_one.py
+++ b/one/tests/test_one.py
@@ -1338,7 +1338,7 @@ def test_list_datasets(self):
         self.one._cache['datasets'] = self.one._cache['datasets'].iloc[0:0].copy()
 
         dsets = self.one.list_datasets(self.eid, details=True, query_type='remote')
-        self.assertEqual(171, len(dsets))  # this may change after a BWM release or patch
+        self.assertEqual(183, len(dsets))  # this may change after a BWM release or patch
         self.assertEqual(1, dsets.index.nlevels, 'details data frame should be without eid index')
 
         # Test missing eid
@@ -1355,12 +1355,12 @@ def test_list_datasets(self):
         # Test details=False, with eid
         dsets = self.one.list_datasets(self.eid, details=False, query_type='remote')
         self.assertIsInstance(dsets, list)
-        self.assertEqual(171, len(dsets))  # this may change after a BWM release or patch
+        self.assertEqual(183, len(dsets))  # this may change after a BWM release or patch
 
         # Test with other filters
         dsets = self.one.list_datasets(self.eid, collection='*probe*', filename='*channels*',
                                        details=False, query_type='remote')
-        self.assertEqual(20, len(dsets))
+        self.assertEqual(24, len(dsets))
         self.assertTrue(all(x in y for x in ('probe', 'channels') for y in dsets))
 
         with self.assertWarns(Warning):