From d6a294895716d02d9c2de3646f01a39f785eb61a Mon Sep 17 00:00:00 2001 From: Miles Wells Date: Wed, 17 Jan 2024 14:21:56 +0200 Subject: [PATCH] fixes #109 --- CHANGELOG.md | 14 ++++++++++---- one/__init__.py | 2 +- one/alf/cache.py | 2 +- one/api.py | 8 +++++--- one/tests/test_one.py | 7 +++++++ 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fcde8c5..ee170ba7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,21 +1,27 @@ # Changelog -## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.5.3] +## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.5.4] ### Modified -- support non-zero-padded sequence paths in ConvertersMixin.path2ref, e.g. subject/2020-01-01/1 +- HOTFIX: initialize empty One cache tables with correct columns + +## [2.5.3] + +### Modified + +- HOTFIX: support non-zero-padded sequence paths in ConvertersMixin.path2ref, e.g. subject/2020-01-01/1 ## [2.5.2] ### Modified -- handle data urls that have URL parts before 'aggregates/' in OneAlyx.list_aggregates method +- HOTFIX: handle data urls that have URL parts before 'aggregates/' in OneAlyx.list_aggregates method ## [2.5.1] ### Modified -- exclude irrelevant s3 objects with source name in key, e.g. for foo/bar exclude foo/bar_baz/ key +- HOTFIX: exclude irrelevant s3 objects with source name in key, e.g. for foo/bar exclude foo/bar_baz/ key ## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.5.0] diff --git a/one/__init__.py b/one/__init__.py index 06164ae1..c72fcb00 100644 --- a/one/__init__.py +++ b/one/__init__.py @@ -1,2 +1,2 @@ """The Open Neurophysiology Environment (ONE) API.""" -__version__ = '2.5.3' +__version__ = '2.5.4' diff --git a/one/alf/cache.py b/one/alf/cache.py index 586b9bc6..8034c826 100644 --- a/one/alf/cache.py +++ b/one/alf/cache.py @@ -31,7 +31,7 @@ from one.alf.files import session_path_parts, get_alf_path from one.converters import session_record2path -__all__ = ['make_parquet_db', 'remove_missing_datasets'] +__all__ = ['make_parquet_db', 'remove_missing_datasets', 'DATASETS_COLUMNS', 'SESSIONS_COLUMNS'] _logger = logging.getLogger(__name__) # ------------------------------------------------------------------------------------------------- diff --git a/one/api.py b/one/api.py index 6c7340b6..7b1d6b03 100644 --- a/one/api.py +++ b/one/api.py @@ -26,7 +26,7 @@ import one.alf.io as alfio import one.alf.files as alfiles import one.alf.exceptions as alferr -from .alf.cache import make_parquet_db +from .alf.cache import make_parquet_db, DATASETS_COLUMNS, SESSIONS_COLUMNS from .alf.spec import is_uuid_string from . import __version__ from one.converters import ConversionMixin, session_record2path @@ -148,7 +148,9 @@ def load_cache(self, tables_dir=None, **kwargs): # No tables present meta['expired'] = True meta['raw'] = {} - self._cache.update({'datasets': pd.DataFrame(), 'sessions': pd.DataFrame()}) + self._cache.update({ + 'datasets': pd.DataFrame(columns=DATASETS_COLUMNS).set_index(['eid', 'id']), + 'sessions': pd.DataFrame(columns=SESSIONS_COLUMNS).set_index('id')}) if self.offline: # In online mode, the cache tables should be downloaded later warnings.warn(f'No cache tables found in {self._tables_dir}') created = [datetime.fromisoformat(x['date_created']) @@ -290,7 +292,7 @@ def _update_cache_from_records(self, strict=False, **kwargs): to_drop = set(records.columns) - set(self._cache[table].columns) records.drop(to_drop, axis=1, inplace=True) records = records.reindex(columns=self._cache[table].columns) - assert all(self._cache[table].columns == records.columns) + assert set(self._cache[table].columns) == set(records.columns) # Update existing rows to_update = records.index.isin(self._cache[table].index) self._cache[table].loc[records.index[to_update], :] = records[to_update] diff --git a/one/tests/test_one.py b/one/tests/test_one.py index 9565c961..3dac9436 100644 --- a/one/tests/test_one.py +++ b/one/tests/test_one.py @@ -796,6 +796,13 @@ def test_update_cache_from_records(self): with self.assertRaises(KeyError): self.one._update_cache_from_records(unknown=datasets) self.assertIsNone(self.one._update_cache_from_records(datasets=None)) + # Absent cache table + self.one.load_cache(tables_dir='/foo') + self.one._update_cache_from_records(sessions=session, datasets=dataset) + self.assertTrue(all(self.one._cache.sessions == pd.DataFrame([session]))) + self.assertEqual(1, len(self.one._cache.datasets)) + self.assertEqual(self.one._cache.datasets.squeeze().name, dataset.name) + self.assertCountEqual(self.one._cache.datasets.squeeze().to_dict(), dataset.to_dict()) def test_save_loaded_ids(self): """Test One.save_loaded_ids and logic within One._check_filesystem"""