Skip to content

Commit

Permalink
Merge pull request #106 from int-brain-lab/olive
Browse files Browse the repository at this point in the history
Olive
  • Loading branch information
k1o0 authored Jan 24, 2024
2 parents 9377e7f + 647123f commit 22f2972
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 9 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog
## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.5.5]
## [Latest](https://github.com/int-brain-lab/ONE/commits/main) [2.6.0]

### Modified
- `one.load_dataset`
- add an option to skip computing hash for existing files when loading datasets `check_hash=False`
- check filesize before computing hash for performance


## [2.5.5]

### Modified

Expand Down
2 changes: 1 addition & 1 deletion one/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""The Open Neurophysiology Environment (ONE) API."""
__version__ = '2.5.5'
__version__ = '2.6.0'
12 changes: 6 additions & 6 deletions one/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def sort_fcn(itm):
else:
return eids

def _check_filesystem(self, datasets, offline=None, update_exists=True):
def _check_filesystem(self, datasets, offline=None, update_exists=True, check_hash=True):
"""Update the local filesystem for the given datasets.
Given a set of datasets, check whether records correctly reflect the filesystem.
Expand Down Expand Up @@ -581,15 +581,15 @@ def _check_filesystem(self, datasets, offline=None, update_exists=True):
if file.exists():
# Check if there's a hash mismatch
# If so, add this index to list of datasets that need downloading
if rec['hash'] is not None:
if rec['file_size'] and file.stat().st_size != rec['file_size']:
_logger.warning('local file size mismatch on dataset: %s',
PurePosixPath(rec.session_path, rec.rel_path))
indices_to_download.append(i)
elif check_hash and rec['hash'] is not None:
if hashfile.md5(file) != rec['hash']:
_logger.warning('local md5 mismatch on dataset: %s',
PurePosixPath(rec.session_path, rec.rel_path))
indices_to_download.append(i)
elif rec['file_size'] and file.stat().st_size != rec['file_size']:
_logger.warning('local file size mismatch on dataset: %s',
PurePosixPath(rec.session_path, rec.rel_path))
indices_to_download.append(i)
files.append(file) # File exists so add to file list
else:
# File doesn't exist so add None to output file list
Expand Down
17 changes: 17 additions & 0 deletions one/remote/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,23 @@ def get_s3_public():
return s3, S3_BUCKET_IBL


def get_s3_allen():
"""
Retrieve the Allen public S3 service resource.
Returns
-------
s3.ServiceResource
An S3 ServiceResource instance with the provided.
str
The name of the S3 bucket.
"""
S3_BUCKET_ALLEN = 'allen-brain-cell-atlas'
session = boto3.Session(region_name='us-west-2')
s3 = session.resource('s3', config=Config(signature_version=UNSIGNED))
return s3, S3_BUCKET_ALLEN


def get_s3_from_alyx(alyx, repo_name=REPO_DEFAULT):
"""
Create an S3 resource instance using credentials from an Alyx data repository.
Expand Down
14 changes: 14 additions & 0 deletions one/tests/remote/test_aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,20 @@ def test_url2uri(self):
uri, loc = aws.url2uri(url, return_location=True)
self.assertEqual(loc, 'eu-east-1')

@mock.patch('boto3.Session')
def test_get_ibl_s3(self, session_mock):
s3, bucket = aws.get_s3_public()
resource = session_mock().resource
self.assertIs(s3, resource())
self.assertEqual(bucket, 'ibl-brain-wide-map-public')

@mock.patch('boto3.Session')
def test_get_allen_s3(self, session_mock):
s3, bucket = aws.get_s3_allen()
resource = session_mock().resource
self.assertIs(s3, resource())
self.assertEqual(bucket, 'allen-brain-cell-atlas')


if __name__ == '__main__':
unittest.main(exit=False)
10 changes: 10 additions & 0 deletions one/tests/test_one.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ def setUp(self) -> None:
self.one = ONE(mode='local', cache_dir=self.tempdir.name)
# Create dset files from cache
util.create_file_tree(self.one)
# here we create some variations to get coverage over more case
# the 10 first records will have the right file size (0) but the wrong hash
# the next 10 records will have the right file size (0) but the correct empty file md5
# all remaining records have NaN in file_size and None in hash (default cache table)
cols = self.one._cache['datasets'].columns
self.one._cache['datasets'].iloc[:20, cols.get_loc('file_size')] = 0
self.one._cache['datasets'].iloc[:20, cols.get_loc('hash')]\
= 'd41d8cd98f00b204e9800998ecf8427e' # empty hash correct
self.one._cache['datasets'].iloc[:10, cols.get_loc('hash')]\
= 'd41d8cda454aaaa4e9800998ecf8497e' # wrong hash

def tearDown(self) -> None:
while Path(self.one.cache_dir).joinpath('.cache.lock').exists():
Expand Down
3 changes: 2 additions & 1 deletion one/webclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,11 +610,12 @@ def _generic_request(self, reqfunction, rest_query, data=None, files=None):
self.authenticate(username=username, force=True)
return self._generic_request(reqfunction, rest_query, data=data, files=files)
else:
_logger.debug('Response text: ' + r.text)
_logger.debug('Response text raw: ' + r.text)
try:
message = json.loads(r.text)
message.pop('status_code', None) # Get status code from response object instead
message = message.get('detail') or message # Get details if available
_logger.debug(message)
except json.decoder.JSONDecodeError:
message = r.text
raise requests.HTTPError(r.status_code, rest_query, message, response=r)
Expand Down

0 comments on commit 22f2972

Please sign in to comment.