From 5394651bf12dc7c24b2e9c4666e44476c6371b37 Mon Sep 17 00:00:00 2001
From: mayofaulkner <mayo.faulkner@ucl.ac.uk>
Date: Tue, 24 Sep 2024 11:10:28 +0100
Subject: [PATCH] account for public being in rel_path (#127)

* account for public being in rel_path
* Slightly more generalized version

---------

Co-authored-by: Miles Wells <k1o0@5tk.co>
---
 CHANGELOG.md             |  2 ++
 one/api.py               | 12 ++++++------
 one/params.py            |  6 +++---
 one/tests/test_one.py    |  5 +++--
 one/tests/test_params.py |  2 +-
 5 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 67ccc1a9..228b9cbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@
 
 - HOTFIX: When downloading cache only authenticate Alyx when necessary
 - HOTFIX: Ensure http data server URL does not end in slash
+- HOTFIX: Handle public aggregate dataset relative paths
+- HOTFIX: No longer warns in silent mode when no param conflicts present
 - Explicit kwargs in load_* methods to avoid user confusion (e.g. no 'namespace' kwarg for `load_dataset`)
 
 ## [2.9.0]
diff --git a/one/api.py b/one/api.py
index 9677bba1..2e39dd2d 100644
--- a/one/api.py
+++ b/one/api.py
@@ -1827,13 +1827,13 @@ def list_aggregates(self, relation: str, identifier: str = None,
                    .reset_index(level=0)
                    .drop('eid', axis=1)
                    .rename_axis(index={'id': 'did'}))
-        # Since rel_path for public FI file records starts with public/aggregates instead of just
-        # aggregates as is the case for internal FI file records, as well as public and internal
-        # AWS file records, we need to make sure to use the file path part after 'aggregates'
-        # and not simply the second part, as relation.
+        # Since rel_path for public FI file records starts with 'public/aggregates' instead of just
+        # 'aggregates', we should discard the file path parts before 'aggregates' (if present)
+        records['rel_path'] = records['rel_path'].str.replace(
+            r'^[\w\/]+(?=aggregates\/)', '', n=1, regex=True)
+        # The relation is the first part after 'aggregates', i.e. the second part
         records['relation'] = records['rel_path'].map(
-            lambda x: x.split('aggregates')[-1].split('/')[1].lower()
-        )
+            lambda x: x.split('aggregates')[-1].split('/')[1].lower())
         records = records[records['relation'] == relation.lower()]
 
         def path2id(p) -> str:
diff --git a/one/params.py b/one/params.py
index 1289b34f..47409f9e 100644
--- a/one/params.py
+++ b/one/params.py
@@ -182,12 +182,12 @@ def setup(client=None, silent=False, make_default=None, username=None, cache_dir
             print('SETUP ABANDONED.  Please re-run.')
             return par_current
     else:
-        par = par_current
-        if any(v for k, v in cache_map.CLIENT_MAP.items() if k != client_key):
-            warnings.warn('Warning: the directory provided is already a cache for another URL.')
         # Precedence: user provided cache_dir; previously defined; the default location
         default_cache_dir = Path(CACHE_DIR_DEFAULT, client_key)
         cache_dir = cache_dir or cache_map.CLIENT_MAP.get(client_key, default_cache_dir)
+        par = par_current
+        if any(v for k, v in cache_map.CLIENT_MAP.items() if k != client_key and v == cache_dir):
+            warnings.warn('Warning: the directory provided is already a cache for another URL.')
 
     # Update and save parameters
     Path(cache_dir).mkdir(exist_ok=True, parents=True)
diff --git a/one/tests/test_one.py b/one/tests/test_one.py
index 530679c4..10aa9b87 100644
--- a/one/tests/test_one.py
+++ b/one/tests/test_one.py
@@ -1249,14 +1249,15 @@ def test_list_aggregates(self):
         """Test OneAlyx.list_aggregates"""
         # Test listing by relation
         datasets = self.one.list_aggregates('subjects')
-        self.assertTrue(all(datasets['rel_path'].str.startswith('cortexlab/Subjects')))
+        self.assertTrue(all(datasets['rel_path'].str.startswith('aggregates/Subjects')))
         self.assertIn('exists_aws', datasets.columns)
         self.assertIn('session_path', datasets.columns)
         self.assertTrue(all(datasets['session_path'] == ''))
         self.assertTrue(self.one.list_aggregates('foobar').empty)
         # Test filtering with an identifier
         datasets = self.one.list_aggregates('subjects', 'ZM_1085')
-        self.assertTrue(all(datasets['rel_path'].str.startswith('cortexlab/Subjects/ZM_1085')))
+        expected = 'aggregates/Subjects/mainenlab/ZM_1085'
+        self.assertTrue(all(datasets['rel_path'].str.startswith(expected)))
         self.assertTrue(self.one.list_aggregates('subjects', 'foobar').empty)
         # Test that additional parts of data path are correctly removed
         # specifically /public in FI openalyx file rec
diff --git a/one/tests/test_params.py b/one/tests/test_params.py
index e38958b2..0ae61079 100644
--- a/one/tests/test_params.py
+++ b/one/tests/test_params.py
@@ -61,7 +61,7 @@ def test_setup(self, _):
             self.assertNotEqual(cache.ALYX_LOGIN, 'mistake')
 
         # Check that raises ValueError when bad URL provided
-        self.url = 'ftp://'
+        self.url = 'ftp://foo.bar.org'
         with self.assertRaises(ValueError), mock.patch('one.params.input', new=self._mock_input):
             one.params.setup()