Merge pull request googleapis#2545 from dhermes/revamp-iterator-2

Moving backend specific behavior from Page to Iterator.
GPCsolutions · Oct 17, 2016 · 0aca3f6 · 0aca3f6
2 parents a8b0c19 + 8226323
commit 0aca3f6
Show file tree

Hide file tree

Showing 9 changed files with 392 additions and 280 deletions.
diff --git a/core/google/cloud/iterator.py b/core/google/cloud/iterator.py
@@ -17,22 +17,19 @@
 These iterators simplify the process of paging through API responses
 where the response is a list of results with a ``nextPageToken``.
 
-To make an iterator work, just override the ``PAGE_CLASS`` class
-attribute so that given a response (containing a page of results) can
-be parsed into an iterable page of the actual objects you want::
+To make an iterator work, you may need to override the
+``ITEMS_KEY`` class attribute so that a given response (containing a page of
+results) can be parsed into an iterable page of the actual objects you want::
 
-  class MyPage(Page):
+  class MyIterator(Iterator):
+
+      ITEMS_KEY = 'blocks'
 
       def _item_to_value(self, item):
           my_item = MyItemClass(other_arg=True)
           my_item._set_properties(item)
           return my_item
 
-
-  class MyIterator(Iterator):
-
-      PAGE_CLASS = MyPage
-
 You then can use this to get **all** the results from a resource::
 
     >>> iterator = MyIterator(...)
@@ -69,12 +66,55 @@ class MyIterator(Iterator):
     2
     >>> iterator.page.remaining
     19
+
+It's also possible to consume an entire page and handle the paging process
+manually::
+
+    >>> iterator = MyIterator(...)
+    >>> # Manually pull down the first page.
+    >>> iterator.update_page()
+    >>> items = list(iterator.page)
+    >>> items
+    [
+        <MyItemClass at 0x7fd64a098ad0>,
+        <MyItemClass at 0x7fd64a098ed0>,
+        <MyItemClass at 0x7fd64a098e90>,
+    ]
+    >>> iterator.page.remaining
+    0
+    >>> iterator.page.num_items
+    3
+    >>> iterator.next_page_token
+    'eav1OzQB0OM8rLdGXOEsyQWSG'
+    >>>
+    >>> # Ask for the next page to be grabbed.
+    >>> iterator.update_page()
+    >>> list(iterator.page)
+    [
+        <MyItemClass at 0x7fea740abdd0>,
+        <MyItemClass at 0x7fea740abe50>,
+    ]
+    >>>
+    >>> # When there are no more results
+    >>> iterator.update_page()
+    >>> iterator.page is None
+    True
 """
 
 
 import six
 
 
+_UNSET = object()
+_NO_MORE_PAGES_ERR = 'Iterator has no more pages.'
+_UNSTARTED_ERR = (
+    'Iterator has not been started. Either begin iterating, '
+    'call next(my_iter) or call my_iter.update_page().')
+_PAGE_ERR_TEMPLATE = (
+    'Tried to update the page while current page (%r) still has %d '
+    'items remaining.')
+
+
 class Page(object):
     """Single page of results in an iterator.
 
@@ -83,23 +123,26 @@ class Page(object):
 
     :type response: dict
     :param response: The JSON API response for a page.
-    """
 
-    ITEMS_KEY = 'items'
+    :type items_key: str
+    :param items_key: The dictionary key used to retrieve items
+                      from the response.
+    """
 
-    def __init__(self, parent, response):
+    def __init__(self, parent, response, items_key):
         self._parent = parent
-        items = response.get(self.ITEMS_KEY, ())
+        items = response.get(items_key, ())
         self._num_items = len(items)
         self._remaining = self._num_items
         self._item_iter = iter(items)
+        self.response = response
 
     @property
     def num_items(self):
         """Total items in the page.
 
         :rtype: int
-        :returns: The number of items in this page of items.
+        :returns: The number of items in this page.
         """
         return self._num_items
 
@@ -108,31 +151,18 @@ def remaining(self):
         """Remaining items in the page.
 
         :rtype: int
-        :returns: The number of items remaining this page.
+        :returns: The number of items remaining in this page.
         """
         return self._remaining
 
     def __iter__(self):
         """The :class:`Page` is an iterator."""
         return self
 
-    def _item_to_value(self, item):
-        """Get the next item in the page.
-
-        This method (along with the constructor) is the workhorse
-        of this class. Subclasses will need to implement this method.
-
-        :type item: dict
-        :param item: An item to be converted to a native object.
-
-        :raises NotImplementedError: Always
-        """
-        raise NotImplementedError
-
     def next(self):
-        """Get the next value in the iterator."""
+        """Get the next value in the page."""
         item = six.next(self._item_iter)
-        result = self._item_to_value(item)
+        result = self._parent._item_to_value(item)
         # Since we've successfully got the next value from the
         # iterator, we update the number of remaining.
         self._remaining -= 1
@@ -145,9 +175,10 @@ def next(self):
 class Iterator(object):
     """A generic class for iterating through Cloud JSON APIs list responses.
 
-    Sub-classes need to over-write ``PAGE_CLASS``.
+    Sub-classes need to over-write :attr:`ITEMS_KEY` and to define
+    :meth:`_item_to_value`.
 
-    :type client: :class:`google.cloud.client.Client`
+    :type client: :class:`~google.cloud.client.Client`
     :param client: The client, which owns a connection to make requests.
 
     :type page_token: str
@@ -156,18 +187,22 @@ class Iterator(object):
     :type max_results: int
     :param max_results: (Optional) The maximum number of results to fetch.
 
-    :type extra_params: dict or None
-    :param extra_params: Extra query string parameters for the API call.
+    :type extra_params: dict
+    :param extra_params: (Optional) Extra query string parameters for the
+                         API call.
 
     :type path: str
-    :param path: The path to query for the list of items.
+    :param path: (Optional) The path to query for the list of items. Defaults
+                 to :attr:`PATH` on the current iterator class.
     """
 
     PAGE_TOKEN = 'pageToken'
     MAX_RESULTS = 'maxResults'
     RESERVED_PARAMS = frozenset([PAGE_TOKEN, MAX_RESULTS])
-    PAGE_CLASS = Page
     PATH = None
+    ITEMS_KEY = 'items'
+    """The dictionary key used to retrieve items from each response."""
+    _PAGE_CLASS = Page
 
     def __init__(self, client, page_token=None, max_results=None,
                  extra_params=None, path=None):
@@ -180,7 +215,7 @@ def __init__(self, client, page_token=None, max_results=None,
         self.page_number = 0
         self.next_page_token = page_token
         self.num_results = 0
-        self._page = None
+        self._page = _UNSET
 
     def _verify_params(self):
         """Verifies the parameters don't use any reserved parameter.
@@ -197,46 +232,86 @@ def _verify_params(self):
     def page(self):
         """The current page of results that has been retrieved.
 
+        If there are no more results, will return :data:`None`.
+
         :rtype: :class:`Page`
         :returns: The page of items that has been retrieved.
+        :raises AttributeError: If the page has not been set.
         """
+        if self._page is _UNSET:
+            raise AttributeError(_UNSTARTED_ERR)
         return self._page
 
     def __iter__(self):
         """The :class:`Iterator` is an iterator."""
         return self
 
-    def _update_page(self):
-        """Replace the current page.
+    def update_page(self, require_empty=True):
+        """Move to the next page in the result set.
 
-        Does nothing if the current page is non-null and has items
-        remaining.
+        If the current page is not empty and ``require_empty`` is :data:`True`
+        then an exception will be raised. If the current page is not empty
+        and ``require_empty`` is :data:`False`, then this will return
+        without updating the current page.
 
-        :raises: :class:`~exceptions.StopIteration` if there is no next page.
+        If the current page **is** empty, but there are no more results,
+        sets the current page to :data:`None`.
+
+        If there are no more pages, throws an exception.
+
+        :type require_empty: bool
+        :param require_empty: (Optional) Flag to indicate if the current page
+                              must be empty before updating.
+
+        :raises ValueError: If ``require_empty`` is :data:`True` but the
+                            current page is not empty.
+        :raises ValueError: If there are no more pages.
         """
-        if self.page is not None and self.page.remaining > 0:
-            return
-        if self.has_next_page():
-            response = self._get_next_page_response()
-            self._page = self.PAGE_CLASS(self, response)
+        if self._page is None:
+            raise ValueError(_NO_MORE_PAGES_ERR)
+
+        # NOTE: This assumes Page.remaining can never go below 0.
+        page_empty = self._page is _UNSET or self._page.remaining == 0
+        if page_empty:
+            if self._has_next_page():
+                response = self._get_next_page_response()
+                self._page = self._PAGE_CLASS(self, response, self.ITEMS_KEY)
+            else:
+                self._page = None
         else:
-            raise StopIteration
+            if require_empty:
+                msg = _PAGE_ERR_TEMPLATE % (self._page, self.page.remaining)
+                raise ValueError(msg)
+
+    def _item_to_value(self, item):
+        """Get the next item in the page.
+
+        Subclasses will need to implement this method.
+
+        :type item: dict
+        :param item: An item to be converted to a native object.
+
+        :raises NotImplementedError: Always
+        """
+        raise NotImplementedError
 
     def next(self):
-        """Get the next value in the iterator."""
-        self._update_page()
+        """Get the next item from the request."""
+        self.update_page(require_empty=False)
+        if self.page is None:
+            raise StopIteration
         item = six.next(self.page)
         self.num_results += 1
         return item
 
     # Alias needed for Python 2/3 support.
     __next__ = next
 
-    def has_next_page(self):
-        """Determines whether or not this iterator has more pages.
+    def _has_next_page(self):
+        """Determines whether or not there are more pages with results.
 
         :rtype: boolean
-        :returns: Whether the iterator has more pages or not.
+        :returns: Whether the iterator has more pages.
         """
         if self.page_number == 0:
             return True
@@ -281,4 +356,4 @@ def reset(self):
         self.page_number = 0
         self.next_page_token = None
         self.num_results = 0
-        self._page = None
+        self._page = _UNSET