From 6d9b142257bebf6803ef3f4572ff569b65148f56 Mon Sep 17 00:00:00 2001 From: Patrick Quinn Date: Thu, 22 Apr 2021 11:26:51 -0400 Subject: [PATCH] HARMONY-829: Update to use latest Zarr version and add User-Agent --- README.rst | 6 ++---- eosdis_store/stores.py | 15 ++++++++++++--- presentation/requirements.txt | 2 ++ requirements.txt | 3 +-- 4 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 presentation/requirements.txt diff --git a/README.rst b/README.rst index bfaebca..3a9a7e6 100644 --- a/README.rst +++ b/README.rst @@ -120,10 +120,8 @@ To allow the technique to work with EOSDIS data, we have extended it and optimiz * The store caches redirect URLs for a period of time set by the Cache-Control header. Doing this avoids the overhead of repeated redirects when accessing parts of files. -* In addition to backward-compatible APIs, the store exposes a proposed API that allows it to make more efficient access - decisions. The ticket describing the API is available here: ``_. - The store works without this implementation but is significantly faster with it, making the following optimizations: - +* The store uses a parallel API that allows it to make more efficient access optimizations: +* * When the Zarr library accesses data that requires reading multiple near-sequential bytes in the file, the store combines these smaller requests into a single larger request. diff --git a/eosdis_store/stores.py b/eosdis_store/stores.py index d49aa13..2c154ea 100644 --- a/eosdis_store/stores.py +++ b/eosdis_store/stores.py @@ -8,6 +8,7 @@ import xml.etree.ElementTree as ElementTree from .dmrpp import to_zarr +from .version import __version__ from zarr.storage import ConsolidatedMetadataStore logger = logging.getLogger(__name__) @@ -99,7 +100,10 @@ def _async_read(self, offset, size): """ logger.debug(f"Reading {self.url} [{offset}:{offset+size}] ({size} bytes)") range_str = '%d-%d' % (offset, offset + size) - request = self.session.get(self.url, headers={ 'Range': 'bytes=' + range_str }) + request = self.session.get(self.url, headers={ + 'Range': 'bytes=' + range_str, + 'User-Agent': f'zarr-eosdis-store/{__version__}' + }) if self.first_fetch: self.first_fetch = False request.result() @@ -132,9 +136,9 @@ def __getitem__(self, key): Returns: The data or metadata value of the item """ - return next(self.getitems((key, )))[1] + return self.getitems((key, ))[key] - def getitems(self, keys): + def getitems(self, keys, **kwargs): """Get values for the provided list of keys from the Zarr store Args: @@ -143,6 +147,11 @@ def getitems(self, keys): Returns: An iterator returning tuples of the input keys to their data or metadata values """ + return dict(self._getitems_generator(keys, **kwargs)) + + def _getitems_generator(self, keys, **kwargs): + """Generate results for getitems + """ ranges = [] for key in keys: if re.search(r'/\d+(\.\d+)*$', key): diff --git a/presentation/requirements.txt b/presentation/requirements.txt new file mode 100644 index 0000000..29abc32 --- /dev/null +++ b/presentation/requirements.txt @@ -0,0 +1,2 @@ +matplotlib>=3.4.1 +h5py>=3.2.1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8585712..b71162c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ CacheControl>=0.12.6 requests>=2.23.0 requests-futures>=1.0.0 -# Updates to Zarr to implement the getitems method proposed in https://github.com/zarr-developers/zarr-python/issues/536 -zarr @ git+https://github.com/bilts/zarr-python.git@getitems-prototype#egg=zarr +zarr>=2.7.1 ipypb~=0.5 xarray~=0.16