From 278a3873e58b050824211ddb73775dbc03795d6a Mon Sep 17 00:00:00 2001 From: akharit <38331238+akharit@users.noreply.github.com> Date: Wed, 6 Mar 2019 15:12:06 -0800 Subject: [PATCH] Release 0.0.44 (#280) * Add continuation token to for liststatus * Updated history and version --- HISTORY.rst | 5 +++++ azure/datalake/store/__init__.py | 2 +- azure/datalake/store/core.py | 16 +++++++++------- azure/datalake/store/lib.py | 4 ++-- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 7afb453..f1c924b 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,11 @@ Release History =============== +0.0.44 (2019-03-05) ++++++++++++++++++++ +* Add continuation token to LISTSTATUS api call +* Update api-version to 2018-09-01 + 0.0.43 (2019-03-01) +++++++++++++++++++ * Fix bug in downloader when glob returns a single file diff --git a/azure/datalake/store/__init__.py b/azure/datalake/store/__init__.py index 70cce79..e93c38f 100644 --- a/azure/datalake/store/__init__.py +++ b/azure/datalake/store/__init__.py @@ -6,7 +6,7 @@ # license information. # -------------------------------------------------------------------------- -__version__ = "0.0.43" +__version__ = "0.0.44" from .core import AzureDLFileSystem from .multithread import ADLDownloader diff --git a/azure/datalake/store/core.py b/azure/datalake/store/core.py index b498af2..1c6df22 100644 --- a/azure/datalake/store/core.py +++ b/azure/datalake/store/core.py @@ -53,7 +53,7 @@ class AzureDLFileSystem(object): url_suffix: str (None) Domain to send REST requests to. The end-point URL is constructed using this and the store_name. If None, use default. - api_version: str (2018-05-01) + api_version: str (2018-09-01) The API version to target with requests. Changing this value will change the behavior of the requests, and can cause unexpected behavior or breaking changes. Changes to this value should be undergone with caution. @@ -118,14 +118,16 @@ def _ls_batched(self, path, batch_size=4000): raise ValueError("Batch size must be strictly greater than 1") parms = {'listSize': batch_size} ret = [] - data = [None] + continuation_token = "NonEmptyStringSentinel" - while data: - data = self.azure.call('LISTSTATUS', path, **parms)['FileStatuses']['FileStatus'] + while continuation_token != "": + ls_call_result = self.azure.call('LISTSTATUS', path, **parms) + + data = ls_call_result['FileStatuses']['FileStatus'] ret.extend(data) - if len(data) < batch_size: - break - parms['listAfter'] = ret[-1]['pathSuffix'] # Last path to be used as ListAfter + + continuation_token = ls_call_result['FileStatuses']['continuationToken'] + parms['listAfter'] = continuation_token # continuationToken to be used as ListAfter return ret diff --git a/azure/datalake/store/lib.py b/azure/datalake/store/lib.py index f246c06..f164274 100644 --- a/azure/datalake/store/lib.py +++ b/azure/datalake/store/lib.py @@ -221,7 +221,7 @@ class DatalakeRESTInterface: url_suffix: str (None) Domain to send REST requests to. The end-point URL is constructed using this and the store_name. If None, use default. - api_version: str (2018-05-01) + api_version: str (2018-09-01) The API version to target with requests. Changing this value will change the behavior of the requests, and can cause unexpected behavior or breaking changes. Changes to this value should be undergone with caution. @@ -256,7 +256,7 @@ class DatalakeRESTInterface: } def __init__(self, store_name=default_store, token=None, - url_suffix=default_adls_suffix, api_version='2018-05-01', **kwargs): + url_suffix=default_adls_suffix, api_version='2018-09-01', **kwargs): # in the case where an empty string is passed for the url suffix, it must be replaced with the default. url_suffix = url_suffix or default_adls_suffix self.local = threading.local()