From d24e6307c82bc658c475715f226476dbed0dcb7f Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Fri, 14 Oct 2016 17:08:41 -0700 Subject: [PATCH] Adding public next_page() to Iterator. The previous implementation may catch users off guard since the iterator.page access may also update the value before access. In addition, this PR removed the _update_page() / next_page() subclass behavior in _BlobIterator. Over-riding that method was never intended. Instead makes a non-public class attribute _PAGE_CLASS that can be replaced with Page subclasses. This can be revisited if more implementations require custom behavior on Page creation / Page.__init__. --- core/google/cloud/iterator.py | 94 +++++++++++++++++----- core/unit_tests/test_iterator.py | 57 +++++++++++++ resource_manager/unit_tests/test_client.py | 1 + storage/google/cloud/storage/bucket.py | 41 ++++++---- storage/unit_tests/test_bucket.py | 3 + storage/unit_tests/test_client.py | 1 + system_tests/storage.py | 12 +-- 7 files changed, 164 insertions(+), 45 deletions(-) diff --git a/core/google/cloud/iterator.py b/core/google/cloud/iterator.py index 46107753c251..22566d8cc3b3 100644 --- a/core/google/cloud/iterator.py +++ b/core/google/cloud/iterator.py @@ -18,7 +18,7 @@ where the response is a list of results with a ``nextPageToken``. To make an iterator work, you may need to override the -``ITEMS_KEY`` class attribute so that given a response (containing a page of +``ITEMS_KEY`` class attribute so that a given response (containing a page of results) can be parsed into an iterable page of the actual objects you want:: class MyIterator(Iterator): @@ -71,6 +71,13 @@ def _item_to_value(self, item): manually:: >>> iterator = MyIterator(...) + >>> # No page of results before the iterator has started. + >>> iterator.page is None + True + >>> + >>> # Manually pull down the next page. + >>> iterator.next_page() # Returns "updated" status of page + True >>> items = list(iterator.page) >>> items [ @@ -84,18 +91,34 @@ def _item_to_value(self, item): 3 >>> iterator.next_page_token 'eav1OzQB0OM8rLdGXOEsyQWSG' - >>> # And just do the same thing to consume the next page. + >>> + >>> iterator.next_page() + True >>> list(iterator.page) [ , , ] + >>> + >>> # When there are no more results + >>> iterator.next_page() + True + >>> iterator.page is google.cloud.iterator.NO_MORE_PAGES + True """ import six +NO_MORE_PAGES = object() +"""Sentinel object indicating an iterator has no more pages.""" +_NO_MORE_PAGES_ERR = 'Iterator has no more pages.' +_PAGE_ERR_TEMPLATE = ( + 'Tried to get next_page() while current page (%r) still has %d ' + 'items remaining.') + + class Page(object): """Single page of results in an iterator. @@ -123,7 +146,7 @@ def num_items(self): """Total items in the page. :rtype: int - :returns: The number of items in this page of items. + :returns: The number of items in this page. """ return self._num_items @@ -132,7 +155,7 @@ def remaining(self): """Remaining items in the page. :rtype: int - :returns: The number of items remaining this page. + :returns: The number of items remaining in this page. """ return self._remaining @@ -141,7 +164,7 @@ def __iter__(self): return self def next(self): - """Get the next value in the iterator.""" + """Get the next value in the page.""" item = six.next(self._item_iter) result = self._parent._item_to_value(item) # Since we've successfully got the next value from the @@ -159,7 +182,7 @@ class Iterator(object): Sub-classes need to over-write :attr:`ITEMS_KEY` and to define :meth:`_item_to_value`. - :type client: :class:`google.cloud.client.Client` + :type client: :class:`~google.cloud.client.Client` :param client: The client, which owns a connection to make requests. :type page_token: str @@ -168,7 +191,7 @@ class Iterator(object): :type max_results: int :param max_results: (Optional) The maximum number of results to fetch. - :type extra_params: dict or None + :type extra_params: :class:`dict` or :data:`None` :param extra_params: Extra query string parameters for the API call. :type path: str @@ -181,6 +204,7 @@ class Iterator(object): PATH = None ITEMS_KEY = 'items' """The dictionary key used to retrieve items from each response.""" + _PAGE_CLASS = Page def __init__(self, client, page_token=None, max_results=None, extra_params=None, path=None): @@ -213,31 +237,54 @@ def page(self): :rtype: :class:`Page` :returns: The page of items that has been retrieved. """ - self._update_page() return self._page def __iter__(self): """The :class:`Iterator` is an iterator.""" return self - def _update_page(self): - """Update the current page if needed. + def next_page(self, require_empty=True): + """Move to the next page in the result set. + + If the current page is not empty and ``require_empty`` is :data:`True` + then an exception will be raised. If the current page is not empty + and ``require_empty`` is :data:`False`, then this will return + without updating the current page (and will return an ``updated`` + value of :data:`False`). + + If the current page **is** empty, but there are no more results, + sets the current page to :attr:`NO_MORE_PAGES`. + + If the current page is :attr:`NO_MORE_PAGES`, throws an exception. - Subclasses will need to implement this method if they - use data from the ``response`` other than the items. + :type require_empty: bool + :param require_empty: (Optional) Flag to indicate if the current page + must be empty before updating. :rtype: bool :returns: Flag indicated if the page was updated. - :raises: :class:`~exceptions.StopIteration` if there is no next page. + :raises ValueError: If ``require_empty`` is :data:`True` but the + current page is not empty. + :raises ValueError: If the current page is :attr:`NO_MORE_PAGES`. """ - if self._page is not None and self._page.remaining > 0: - return False - elif self.has_next_page(): - response = self._get_next_page_response() - self._page = Page(self, response, self.ITEMS_KEY) + if self._page is NO_MORE_PAGES: + raise ValueError(_NO_MORE_PAGES_ERR) + + # NOTE: This assumes Page.remaining can never go below 0. + page_empty = self._page is None or self._page.remaining == 0 + if page_empty: + if self.has_next_page(): + response = self._get_next_page_response() + self._page = self._PAGE_CLASS(self, response, self.ITEMS_KEY) + else: + self._page = NO_MORE_PAGES + return True else: - raise StopIteration + if require_empty: + msg = _PAGE_ERR_TEMPLATE % (self._page, self.page.remaining) + raise ValueError(msg) + return False def _item_to_value(self, item): """Get the next item in the page. @@ -252,7 +299,10 @@ def _item_to_value(self, item): raise NotImplementedError def next(self): - """Get the next value in the iterator.""" + """Get the next item from the request.""" + self.next_page(require_empty=False) + if self.page is NO_MORE_PAGES: + raise StopIteration item = six.next(self.page) self.num_results += 1 return item @@ -261,10 +311,10 @@ def next(self): __next__ = next def has_next_page(self): - """Determines whether or not this iterator has more pages. + """Determines whether or not there are more pages with results. :rtype: boolean - :returns: Whether the iterator has more pages or not. + :returns: Whether the iterator has more pages. """ if self.page_number == 0: return True diff --git a/core/unit_tests/test_iterator.py b/core/unit_tests/test_iterator.py index d788bcc486e7..0f979860147e 100644 --- a/core/unit_tests/test_iterator.py +++ b/core/unit_tests/test_iterator.py @@ -121,6 +121,63 @@ def test_constructor_w_extra_param_collision(self): with self.assertRaises(ValueError): self._makeOne(client, path=path, extra_params=extra_params) + def test_next_page_no_more(self): + from google.cloud.iterator import NO_MORE_PAGES + + iterator = self._makeOne(None) + iterator._page = NO_MORE_PAGES + with self.assertRaises(ValueError): + iterator.next_page() + + def test_next_page_not_empty_success(self): + from google.cloud.iterator import Page + + iterator = self._makeOne(None) + iterator._page = Page(None, {}, '') + iterator._page._remaining = 1 + updated = iterator.next_page(require_empty=False) + self.assertFalse(updated) + + def test_next_page_not_empty_fail(self): + from google.cloud.iterator import Page + + iterator = self._makeOne(None) + iterator._page = Page(None, {}, '') + iterator._page._remaining = 1 + with self.assertRaises(ValueError): + iterator.next_page(require_empty=True) + + def test_next_page_empty_then_no_more(self): + from google.cloud.iterator import NO_MORE_PAGES + + iterator = self._makeOne(None) + # Fake that there are no more pages. + iterator.page_number = 1 + iterator.next_page_token = None + updated = iterator.next_page() + self.assertTrue(updated) + self.assertIs(iterator.page, NO_MORE_PAGES) + + def test_next_page_empty_then_another(self): + iterator = self._makeOne(None) + # Fake the next page class. + fake_page = object() + page_args = [] + + def dummy_response(): + return {} + + def dummy_page_class(*args): + page_args.append(args) + return fake_page + + iterator._get_next_page_response = dummy_response + iterator._PAGE_CLASS = dummy_page_class + updated = iterator.next_page() + self.assertTrue(updated) + self.assertIs(iterator.page, fake_page) + self.assertEqual(page_args, [(iterator, {}, iterator.ITEMS_KEY)]) + def test___iter__(self): iterator = self._makeOne(None, None) self.assertIs(iter(iterator), iterator) diff --git a/resource_manager/unit_tests/test_client.py b/resource_manager/unit_tests/test_client.py index 8eb58e730f07..a6c35c19660d 100644 --- a/resource_manager/unit_tests/test_client.py +++ b/resource_manager/unit_tests/test_client.py @@ -68,6 +68,7 @@ def dummy_response(): iterator = self._makeOne(client) iterator._get_next_page_response = dummy_response + iterator.next_page() page = iterator.page self.assertEqual(page.num_items, 1) project = iterator.next() diff --git a/storage/google/cloud/storage/bucket.py b/storage/google/cloud/storage/bucket.py index e1b04ea224cd..bf06faa540f0 100644 --- a/storage/google/cloud/storage/bucket.py +++ b/storage/google/cloud/storage/bucket.py @@ -20,6 +20,7 @@ from google.cloud._helpers import _rfc3339_to_datetime from google.cloud.exceptions import NotFound +from google.cloud.iterator import Page from google.cloud.iterator import Iterator from google.cloud.storage._helpers import _PropertyMixin from google.cloud.storage._helpers import _scalar_property @@ -28,6 +29,27 @@ from google.cloud.storage.blob import Blob +class _BlobPage(Page): + """Iterator for a single page of results. + + :type parent: :class:`_BlobIterator` + :param parent: The iterator that owns the current page. + + :type response: dict + :param response: The JSON API response for a page of blobs. + + :type items_key: str + :param items_key: The dictionary key used to retrieve items + from the response. + """ + + def __init__(self, parent, response, items_key): + super(_BlobPage, self).__init__(parent, response, items_key) + # Grab the prefixes from the response. + self.prefixes = tuple(response.get('prefixes', ())) + parent.prefixes.update(self.prefixes) + + class _BlobIterator(Iterator): """An iterator listing blobs in a bucket @@ -51,6 +73,8 @@ class _BlobIterator(Iterator): Defaults to the bucket's client. """ + _PAGE_CLASS = _BlobPage + def __init__(self, bucket, page_token=None, max_results=None, extra_params=None, client=None): if client is None: @@ -76,23 +100,6 @@ def _item_to_value(self, item): blob._set_properties(item) return blob - def _update_page(self): - """Update the current page if needed. - - If the current page was updated, also updates the cumulative - prefixes list on this iterator and sets the local prefixes on - the page. - - :rtype: bool - :returns: Flag indicated if the page was updated. - """ - updated = super(_BlobIterator, self)._update_page() - if updated: - prefixes = tuple(self._page.response.get('prefixes', ())) - self._page.prefixes = prefixes - self.prefixes.update(prefixes) - return updated - class Bucket(_PropertyMixin): """A class representing a Bucket on Cloud Storage. diff --git a/storage/unit_tests/test_bucket.py b/storage/unit_tests/test_bucket.py index 9584d6d506b7..d622bd101360 100644 --- a/storage/unit_tests/test_bucket.py +++ b/storage/unit_tests/test_bucket.py @@ -64,6 +64,7 @@ def dummy_response(): iterator = self._makeOne(bucket, client=client) iterator._get_next_page_response = dummy_response + iterator.next_page() page = iterator.page self.assertEqual(page.prefixes, ('foo',)) self.assertEqual(page.num_items, 1) @@ -97,6 +98,7 @@ def dummy_response(): iterator._get_next_page_response = dummy_response # Parse first response. + iterator.next_page() page1 = iterator.page self.assertEqual(page1.prefixes, ('foo',)) self.assertEqual(page1.num_items, 1) @@ -106,6 +108,7 @@ def dummy_response(): self.assertEqual(blob.name, BLOB_NAME) self.assertEqual(iterator.prefixes, set(['foo'])) # Parse second response. + iterator.next_page() page2 = iterator.page self.assertEqual(page2.prefixes, ('bar',)) self.assertEqual(page2.num_items, 0) diff --git a/storage/unit_tests/test_client.py b/storage/unit_tests/test_client.py index c2b2b79274de..f7f609b03a2a 100644 --- a/storage/unit_tests/test_client.py +++ b/storage/unit_tests/test_client.py @@ -412,6 +412,7 @@ def dummy_response(): iterator = self._makeOne(client) iterator._get_next_page_response = dummy_response + iterator.next_page() page = iterator.page self.assertEqual(page.num_items, 1) bucket = iterator.next() diff --git a/system_tests/storage.py b/system_tests/storage.py index 75faf6faf633..ecd96e389600 100644 --- a/system_tests/storage.py +++ b/system_tests/storage.py @@ -257,12 +257,12 @@ def test_paginate_files(self): truncation_size = 1 count = len(self.FILENAMES) - truncation_size iterator = self.bucket.list_blobs(max_results=count) - iterator._update_page() + iterator.next_page() blobs = list(iterator.page) self.assertEqual(len(blobs), count) self.assertIsNotNone(iterator.next_page_token) - iterator._update_page() + iterator.next_page() last_blobs = list(iterator.page) self.assertEqual(len(last_blobs), truncation_size) @@ -301,7 +301,7 @@ def tearDownClass(cls): @RetryErrors(unittest.TestCase.failureException) def test_root_level_w_delimiter(self): iterator = self.bucket.list_blobs(delimiter='/') - iterator._update_page() + iterator.next_page() blobs = list(iterator.page) self.assertEqual([blob.name for blob in blobs], ['file01.txt']) self.assertIsNone(iterator.next_page_token) @@ -310,7 +310,7 @@ def test_root_level_w_delimiter(self): @RetryErrors(unittest.TestCase.failureException) def test_first_level(self): iterator = self.bucket.list_blobs(delimiter='/', prefix='parent/') - iterator._update_page() + iterator.next_page() blobs = list(iterator.page) self.assertEqual([blob.name for blob in blobs], ['parent/file11.txt']) self.assertIsNone(iterator.next_page_token) @@ -325,7 +325,7 @@ def test_second_level(self): iterator = self.bucket.list_blobs(delimiter='/', prefix='parent/child/') - iterator._update_page() + iterator.next_page() blobs = list(iterator.page) self.assertEqual([blob.name for blob in blobs], expected_names) @@ -341,7 +341,7 @@ def test_third_level(self): # Exercise a layer deeper to illustrate this. iterator = self.bucket.list_blobs(delimiter='/', prefix='parent/child/grand/') - iterator._update_page() + iterator.next_page() blobs = list(iterator.page) self.assertEqual([blob.name for blob in blobs], ['parent/child/grand/file31.txt'])