From d6a294895716d02d9c2de3646f01a39f785eb61a Mon Sep 17 00:00:00 2001
From: Miles Wells <k1o0@5tk.co>
Date: Wed, 17 Jan 2024 14:21:56 +0200
Subject: [PATCH] fixes #109

---
 CHANGELOG.md          | 14 ++++++++++----
 one/__init__.py       |  2 +-
 one/alf/cache.py      |  2 +-
 one/api.py            |  8 +++++---
 one/tests/test_one.py |  7 +++++++
 5 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6fcde8c5..ee170ba7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,21 +1,27 @@
 # Changelog
-## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.5.3]
+## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.5.4]
 
 ### Modified
 
-- support non-zero-padded sequence paths in ConvertersMixin.path2ref, e.g. subject/2020-01-01/1
+- HOTFIX: initialize empty One cache tables with correct columns
+
+## [2.5.3]
+
+### Modified
+
+- HOTFIX: support non-zero-padded sequence paths in ConvertersMixin.path2ref, e.g. subject/2020-01-01/1
 
 ## [2.5.2]
 
 ### Modified
 
-- handle data urls that have URL parts before 'aggregates/' in OneAlyx.list_aggregates method
+- HOTFIX: handle data urls that have URL parts before 'aggregates/' in OneAlyx.list_aggregates method
 
 ## [2.5.1]
 
 ### Modified
 
-- exclude irrelevant s3 objects with source name in key, e.g. for foo/bar exclude foo/bar_baz/ key
+- HOTFIX: exclude irrelevant s3 objects with source name in key, e.g. for foo/bar exclude foo/bar_baz/ key
 
 ## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.5.0]
 
diff --git a/one/__init__.py b/one/__init__.py
index 06164ae1..c72fcb00 100644
--- a/one/__init__.py
+++ b/one/__init__.py
@@ -1,2 +1,2 @@
 """The Open Neurophysiology Environment (ONE) API."""
-__version__ = '2.5.3'
+__version__ = '2.5.4'
diff --git a/one/alf/cache.py b/one/alf/cache.py
index 586b9bc6..8034c826 100644
--- a/one/alf/cache.py
+++ b/one/alf/cache.py
@@ -31,7 +31,7 @@
 from one.alf.files import session_path_parts, get_alf_path
 from one.converters import session_record2path
 
-__all__ = ['make_parquet_db', 'remove_missing_datasets']
+__all__ = ['make_parquet_db', 'remove_missing_datasets', 'DATASETS_COLUMNS', 'SESSIONS_COLUMNS']
 _logger = logging.getLogger(__name__)
 
 # -------------------------------------------------------------------------------------------------
diff --git a/one/api.py b/one/api.py
index 6c7340b6..7b1d6b03 100644
--- a/one/api.py
+++ b/one/api.py
@@ -26,7 +26,7 @@
 import one.alf.io as alfio
 import one.alf.files as alfiles
 import one.alf.exceptions as alferr
-from .alf.cache import make_parquet_db
+from .alf.cache import make_parquet_db, DATASETS_COLUMNS, SESSIONS_COLUMNS
 from .alf.spec import is_uuid_string
 from . import __version__
 from one.converters import ConversionMixin, session_record2path
@@ -148,7 +148,9 @@ def load_cache(self, tables_dir=None, **kwargs):
             # No tables present
             meta['expired'] = True
             meta['raw'] = {}
-            self._cache.update({'datasets': pd.DataFrame(), 'sessions': pd.DataFrame()})
+            self._cache.update({
+                'datasets': pd.DataFrame(columns=DATASETS_COLUMNS).set_index(['eid', 'id']),
+                'sessions': pd.DataFrame(columns=SESSIONS_COLUMNS).set_index('id')})
             if self.offline:  # In online mode, the cache tables should be downloaded later
                 warnings.warn(f'No cache tables found in {self._tables_dir}')
         created = [datetime.fromisoformat(x['date_created'])
@@ -290,7 +292,7 @@ def _update_cache_from_records(self, strict=False, **kwargs):
                 to_drop = set(records.columns) - set(self._cache[table].columns)
                 records.drop(to_drop, axis=1, inplace=True)
                 records = records.reindex(columns=self._cache[table].columns)
-            assert all(self._cache[table].columns == records.columns)
+            assert set(self._cache[table].columns) == set(records.columns)
             # Update existing rows
             to_update = records.index.isin(self._cache[table].index)
             self._cache[table].loc[records.index[to_update], :] = records[to_update]
diff --git a/one/tests/test_one.py b/one/tests/test_one.py
index 9565c961..3dac9436 100644
--- a/one/tests/test_one.py
+++ b/one/tests/test_one.py
@@ -796,6 +796,13 @@ def test_update_cache_from_records(self):
         with self.assertRaises(KeyError):
             self.one._update_cache_from_records(unknown=datasets)
         self.assertIsNone(self.one._update_cache_from_records(datasets=None))
+        # Absent cache table
+        self.one.load_cache(tables_dir='/foo')
+        self.one._update_cache_from_records(sessions=session, datasets=dataset)
+        self.assertTrue(all(self.one._cache.sessions == pd.DataFrame([session])))
+        self.assertEqual(1, len(self.one._cache.datasets))
+        self.assertEqual(self.one._cache.datasets.squeeze().name, dataset.name)
+        self.assertCountEqual(self.one._cache.datasets.squeeze().to_dict(), dataset.to_dict())
 
     def test_save_loaded_ids(self):
         """Test One.save_loaded_ids and logic within One._check_filesystem"""