From 78bf06bd2d2fb63a7b7108101b63eaa284f05790 Mon Sep 17 00:00:00 2001
From: Andreas Hartel <andreas.hartel@aleph-alpha.com>
Date: Wed, 10 Aug 2022 15:28:46 +0200
Subject: [PATCH 1/7] remove references to cloud hosting

---
 aleph_alpha_client/aleph_alpha_client.py | 51 +++++++++++++++---------
 aleph_alpha_client/aleph_alpha_model.py  |  4 +-
 tests/test_qa.py                         |  1 -
 3 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 66c6c87..8457212 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -115,7 +115,7 @@ def complete(
         self,
         model: str,
         prompt: Union[str, List[Union[str, ImagePrompt]]] = "",
-        hosting: str = "cloud",
+        hosting: Optional[str] = None,
         maximum_tokens: Optional[int] = 64,
         temperature: Optional[float] = 0.0,
         top_k: Optional[int] = 0,
@@ -145,8 +145,8 @@ def complete(
             prompt (str, optional, default ""):
                 The text to be completed. Unconditional completion can be started with an empty string (default). The prompt may contain a zero shot or few shot task.
 
-            hosting (str, optional, default "cloud"):
-                Specifies where the computation will take place. This defaults to "cloud", meaning that it can be
+            hosting (str, optional, default None):
+                Specifies where the computation will take place. This defaults to None, meaning that it can be
                 executed on any of our servers. An error will be returned if the specified hosting is not available.
                 Check available_models() for available hostings.
 
@@ -255,7 +255,6 @@ def complete(
         payload = {
             "model": model,
             "prompt": _to_serializable_prompt(prompt=prompt),
-            "hosting": hosting,
             "maximum_tokens": maximum_tokens,
             "temperature": temperature,
             "top_k": top_k,
@@ -276,6 +275,9 @@ def complete(
             "disable_optimizations": disable_optimizations,
         }
 
+        if hosting is not None:
+            payload["hosting"] = hosting
+
         response = self.post_request(
             self.host + "complete",
             headers=self.request_headers,
@@ -295,7 +297,7 @@ def embed(
         prompt: Union[str, Sequence[Union[str, ImagePrompt]]],
         pooling: List[str],
         layers: List[int],
-        hosting: str = "cloud",
+        hosting: Optional[str] = None,
         tokens: Optional[bool] = False,
         type: Optional[str] = None,
     ):
@@ -323,8 +325,8 @@ def embed(
                     * last_token: just use the last token
                     * abs_max: aggregate token embeddings across the sequence dimension using a maximum of absolute values
 
-            hosting (str, optional, default "cloud"):
-                Specifies where the computation will take place. This defaults to "cloud", meaning that it can be
+            hosting (str, optional, default None):
+                Specifies where the computation will take place. This defaults to None, meaning that it can be
                 executed on any of our servers. An error will be returned if the specified hosting is not available.
                 Check available_models() for available hostings.
 
@@ -346,12 +348,15 @@ def embed(
         payload = {
             "model": model,
             "prompt": serializable_prompt,
-            "hosting": hosting,
             "layers": layers,
             "tokens": tokens,
             "pooling": pooling,
             "type": type,
         }
+
+        if hosting is not None:
+            payload["hosting"] = hosting
+
         response = self.post_request(
             self.host + "embed", headers=self.request_headers, json=payload
         )
@@ -361,7 +366,7 @@ def semantic_embed(
         self,
         model: str,
         request: SemanticEmbeddingRequest,
-        hosting: str = "cloud",
+        hosting: Optional[str] = None,
     ):
         """
         Embeds a text and returns vectors that can be used for downstream tasks (e.g. semantic similarity) and models (e.g. classifiers).
@@ -371,7 +376,7 @@ def semantic_embed(
                 Name of model to use. A model name refers to a model architecture (number of parameters among others). Always the latest version of model is used. The model output contains information as to the model version.
 
             hosting (str, required):
-                Specifies where the computation will take place. This defaults to "cloud", meaning that it can be
+                Specifies where the computation will take place. This defaults to None, meaning that it can be
                 executed on any of our servers. An error will be returned if the specified hosting is not available.
                 Check available_models() for available hostings.
 
@@ -385,11 +390,14 @@ def semantic_embed(
 
         payload: Dict[str, Any] = {
             "model": model,
-            "hosting": hosting,
             "prompt": serializable_prompt,
             "representation": request.representation.value,
             "compress_to_size": request.compress_to_size,
         }
+
+        if hosting is not None:
+            payload["hosting"] = hosting
+
         response = self.post_request(
             self.host + "semantic_embed", headers=self.request_headers, json=payload
         )
@@ -399,7 +407,7 @@ def evaluate(
         self,
         model,
         completion_expected,
-        hosting: str = "cloud",
+        hosting: Optional[str] = None,
         prompt: Union[str, List[Union[str, ImagePrompt]]] = "",
     ):
         """
@@ -412,8 +420,8 @@ def evaluate(
             completion_expected (str, required):
                 The ground truth completion expected to be produced given the prompt.
 
-            hosting (str, optional, default "cloud"):
-                Specifies where the computation will take place. This defaults to "cloud", meaning that it can be
+            hosting (str, optional, default None):
+                Specifies where the computation will take place. This defaults to None, meaning that it can be
                 executed on any of our servers. An error will be returned if the specified hosting is not available.
                 Check available_models() for available hostings.
 
@@ -426,9 +434,12 @@ def evaluate(
         payload = {
             "model": model,
             "prompt": serializable_prompt,
-            "hosting": hosting,
             "completion_expected": completion_expected,
         }
+
+        if hosting is not None:
+            payload["hosting"] = hosting
+
         response = self.post_request(
             self.host + "evaluate", headers=self.request_headers, json=payload
         )
@@ -444,7 +455,7 @@ def qa(
         disable_optimizations: bool = False,
         max_answers: int = 0,
         min_score: float = 0.0,
-        hosting: str = "cloud",
+        hosting: Optional[str] = None,
     ):
         """
         Answers a question about a prompt.
@@ -483,8 +494,8 @@ def qa(
             min_score (float, default 0.0):
                 The lower limit of minimum score for every answer.
 
-            hosting (str, default "cloud"):
-                Specifies where the computation will take place. This defaults to "cloud", meaning that it can be
+            hosting (str, default None):
+                Specifies where the computation will take place. This defaults to None, meaning that it can be
                 executed on any of our servers. An error will be returned if the specified hosting is not available.
                 Check available_models() for available hostings.
         """
@@ -500,9 +511,11 @@ def qa(
             "min_score": min_score,
             "max_chunk_size": max_chunk_size,
             "disable_optimizations": disable_optimizations,
-            "hosting": hosting,
         }
 
+        if hosting is not None:
+            payload["hosting"] = hosting
+
         response = self.post_request(
             self.host + "qa",
             headers=self.request_headers,
diff --git a/aleph_alpha_client/aleph_alpha_model.py b/aleph_alpha_client/aleph_alpha_model.py
index a6eede5..6300cc9 100644
--- a/aleph_alpha_client/aleph_alpha_model.py
+++ b/aleph_alpha_client/aleph_alpha_model.py
@@ -1,5 +1,5 @@
 from collections import ChainMap
-from typing import Any, Mapping, Union
+from typing import Any, Mapping, Optional, Union
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.completion import CompletionRequest, CompletionResponse
 from aleph_alpha_client.detokenization import (
@@ -20,7 +20,7 @@
 
 class AlephAlphaModel:
     def __init__(
-        self, client: AlephAlphaClient, model_name: str, hosting: str = "cloud"
+        self, client: AlephAlphaClient, model_name: str, hosting: Optional[str] = None
     ) -> None:
         self.client = client
         self.model_name = model_name
diff --git a/tests/test_qa.py b/tests/test_qa.py
index 3d98003..c8a8b7d 100644
--- a/tests/test_qa.py
+++ b/tests/test_qa.py
@@ -53,7 +53,6 @@ def test_qa_with_client(client: AlephAlphaClient):
     # when posting a QA request with explicit parameters
     response = client.qa(
         model_name,
-        hosting="cloud",
         query="Who likes pizza?",
         documents=[Document.from_prompt(["Andreas likes pizza."])],
     )

From 4389c4530265267988db6f943716e5b515fc3617 Mon Sep 17 00:00:00 2001
From: Andreas Hartel <andreas.hartel@aleph-alpha.com>
Date: Wed, 10 Aug 2022 15:50:08 +0200
Subject: [PATCH 2/7] Retry on 408 errors

---
 aleph_alpha_client/aleph_alpha_client.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 8457212..821b8c5 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -5,6 +5,9 @@
 import logging
 
 from requests import Response
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
 import aleph_alpha_client
 from aleph_alpha_client.document import Document
 from aleph_alpha_client.embedding import SemanticEmbeddingRequest
@@ -41,6 +44,16 @@ def __init__(
         assert token is not None or (email is not None and password is not None)
         self.token = token or self.get_token(email, password)
 
+        retry_strategy = Retry(
+            total=3,
+            status_forcelist=[408],
+            method_whitelist=["POST", "GET"],
+        )
+        adapter = HTTPAdapter(max_retries=retry_strategy)
+        self.requests_session = requests.Session()
+        self.requests_session.mount("https://", adapter)
+        self.requests_session.mount("http://", adapter)
+
     def get_version(self):
         response = self.get_request(self.host + "version")
         response.raise_for_status()

From 335df48fc6b0a4f0c0758b9c8682290f36bd2f06 Mon Sep 17 00:00:00 2001
From: Andreas Hartel <andreas.hartel@aleph-alpha.com>
Date: Wed, 10 Aug 2022 15:51:36 +0200
Subject: [PATCH 3/7] Also retry on 503 errors

---
 aleph_alpha_client/aleph_alpha_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 821b8c5..a8d6441 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -46,7 +46,7 @@ def __init__(
 
         retry_strategy = Retry(
             total=3,
-            status_forcelist=[408],
+            status_forcelist=[408, 503],
             method_whitelist=["POST", "GET"],
         )
         adapter = HTTPAdapter(max_retries=retry_strategy)

From 9f35074294d59a21ab709d273a6a91a09af69c56 Mon Sep 17 00:00:00 2001
From: Andreas Hartel <andreas.hartel@aleph-alpha.com>
Date: Wed, 10 Aug 2022 16:03:44 +0200
Subject: [PATCH 4/7] retry on more codes and with backoff

---
 aleph_alpha_client/aleph_alpha_client.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index a8d6441..ec08b62 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -46,7 +46,8 @@ def __init__(
 
         retry_strategy = Retry(
             total=3,
-            status_forcelist=[408, 503],
+            backoff_factor=0.1,
+            status_forcelist=[408, 429, 500, 502, 503, 504],
             method_whitelist=["POST", "GET"],
         )
         adapter = HTTPAdapter(max_retries=retry_strategy)

From a6bde2e72f38652fdfa713d4299a0f5f7a6b2249 Mon Sep 17 00:00:00 2001
From: Andreas Hartel <andreas.hartel@aleph-alpha.com>
Date: Wed, 10 Aug 2022 16:31:17 +0200
Subject: [PATCH 5/7] Fix timeout test

---
 aleph_alpha_client/aleph_alpha_client.py | 28 +++++++++++++-----------
 tests/test_errors.py                     | 13 ++++++-----
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index ec08b62..d2545dc 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -33,6 +33,17 @@ def __init__(
 
         self.request_timeout_seconds = request_timeout_seconds
 
+        retry_strategy = Retry(
+            total=3,
+            backoff_factor=0.1,
+            status_forcelist=[408, 429, 500, 502, 503, 504],
+            allowed_methods=["POST", "GET"],
+        )
+        adapter = HTTPAdapter(max_retries=retry_strategy)
+        self.requests_session = requests.Session()
+        self.requests_session.mount("https://", adapter)
+        self.requests_session.mount("http://", adapter)
+
         # check server version
         expect_release = "1"
         version = self.get_version()
@@ -44,17 +55,6 @@ def __init__(
         assert token is not None or (email is not None and password is not None)
         self.token = token or self.get_token(email, password)
 
-        retry_strategy = Retry(
-            total=3,
-            backoff_factor=0.1,
-            status_forcelist=[408, 429, 500, 502, 503, 504],
-            method_whitelist=["POST", "GET"],
-        )
-        adapter = HTTPAdapter(max_retries=retry_strategy)
-        self.requests_session = requests.Session()
-        self.requests_session.mount("https://", adapter)
-        self.requests_session.mount("http://", adapter)
-
     def get_version(self):
         response = self.get_request(self.host + "version")
         response.raise_for_status()
@@ -71,10 +71,12 @@ def get_token(self, email, password):
             raise ValueError("cannot get token")
 
     def get_request(self, url, headers=None):
-        return requests.get(url, headers=headers, timeout=self.request_timeout_seconds)
+        return self.requests_session.get(
+            url, headers=headers, timeout=self.request_timeout_seconds
+        )
 
     def post_request(self, url, json, headers=None):
-        return requests.post(
+        return self.requests_session.post(
             url, headers=headers, json=json, timeout=self.request_timeout_seconds
         )
 
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 31671df..82c6236 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -389,11 +389,14 @@ def httpserver_listen_address():
 
 def test_timeout(httpserver):
 
-    httpserver.expect_request("/version").respond_with_handler(
-        lambda request: time.sleep(2)
-    )
+    def handler(foo):
+        time.sleep(2)
+
+    httpserver.expect_request("/version").respond_with_handler(handler)
+
     """Ensures Timeouts works. AlephAlphaClient constructor calls version endpoint."""
-    with pytest.raises(requests.exceptions.Timeout):
+    with pytest.raises(requests.exceptions.ConnectionError):
         AlephAlphaClient(
-            host="http://localhost:8000/", token="AA_TOKEN", request_timeout_seconds=1
+            host="http://localhost:8000/", token="AA_TOKEN", request_timeout_seconds=0.1
         )
+

From ee2e529152fd5436e4d974cd3798e59ca474cf89 Mon Sep 17 00:00:00 2001
From: Andreas Hartel <andreas.hartel@aleph-alpha.com>
Date: Wed, 10 Aug 2022 16:35:29 +0200
Subject: [PATCH 6/7] update version and changelog

---
 Changelog.md                  | 6 ++++++
 aleph_alpha_client/version.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/Changelog.md b/Changelog.md
index 6952f03..77283a2 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## 2.2.0
+
+### New feature
+
+* Retry failed HTTP requests via urllib for status codes 408, 429, 500, 502, 503, 504
+
 ## 2.1.0
 
 ### New feature
diff --git a/aleph_alpha_client/version.py b/aleph_alpha_client/version.py
index 9aa3f90..8a124bf 100644
--- a/aleph_alpha_client/version.py
+++ b/aleph_alpha_client/version.py
@@ -1 +1 @@
-__version__ = "2.1.0"
+__version__ = "2.2.0"

From 7008dfea46955df3bb9d055a10bbfa0fe2dacaf8 Mon Sep 17 00:00:00 2001
From: Andreas Hartel <andreas.hartel@aleph-alpha.com>
Date: Wed, 10 Aug 2022 17:08:24 +0200
Subject: [PATCH 7/7] add 2 more tests for 503 and 408 retries

---
 tests/test_errors.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/test_errors.py b/tests/test_errors.py
index 82c6236..2dc34bf 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -388,7 +388,6 @@ def httpserver_listen_address():
 
 
 def test_timeout(httpserver):
-
     def handler(foo):
         time.sleep(2)
 
@@ -400,3 +399,18 @@ def handler(foo):
             host="http://localhost:8000/", token="AA_TOKEN", request_timeout_seconds=0.1
         )
 
+
+def test_retry_on_503(httpserver):
+    httpserver.expect_request("/version").respond_with_data("busy", status=503)
+
+    """Ensures Timeouts works. AlephAlphaClient constructor calls version endpoint."""
+    with pytest.raises(requests.exceptions.RetryError):
+        AlephAlphaClient(host="http://localhost:8000/", token="AA_TOKEN")
+
+
+def test_retry_on_408(httpserver):
+    httpserver.expect_request("/version").respond_with_data("busy", status=408)
+
+    """Ensures Timeouts works. AlephAlphaClient constructor calls version endpoint."""
+    with pytest.raises(requests.exceptions.RetryError):
+        AlephAlphaClient(host="http://localhost:8000/", token="AA_TOKEN")