From c72d8182185a63eb512819601d8eb4334e5fc7d6 Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 14:50:56 +0200
Subject: [PATCH 01/10] Add Prompt type

---
 aleph_alpha_client/prompt.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/aleph_alpha_client/prompt.py b/aleph_alpha_client/prompt.py
index 0e8f6b1..ed4c8c6 100644
--- a/aleph_alpha_client/prompt.py
+++ b/aleph_alpha_client/prompt.py
@@ -1,8 +1,15 @@
-from typing import Dict, List, Union
+from typing import Dict, List, NamedTuple, Union
 
 from aleph_alpha_client.image import ImagePrompt
 
 
+class Prompt(NamedTuple):
+    items: List[Union[str, ImagePrompt]]
+
+    @staticmethod
+    def from_text(text: str) -> "Prompt":
+        return Prompt([text])
+
 def _to_prompt_item(item: Union[str, ImagePrompt]) -> Dict[str, str]:
     if isinstance(item, str):
         return {"type": "text", "data": item}

From 270b6691a85841ee775b5b73c1ba2de058a692e5 Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 15:03:57 +0200
Subject: [PATCH 02/10] Rename fixture model -> model_name

Prepares adding a new fixture model returning a model-instance
---
 tests/common.py           |  2 +-
 tests/test_complete.py    | 16 ++++++++--------
 tests/test_detokenize.py  | 16 ++++++++--------
 tests/test_embed.py       | 24 ++++++++++++------------
 tests/test_evaluate.py    | 16 ++++++++--------
 tests/test_explanation.py | 12 ++++++------
 tests/test_qa.py          | 26 +++++++++++++-------------
 tests/test_tokenize.py    | 16 ++++++++--------
 8 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/tests/common.py b/tests/common.py
index 963dc2d..aa9f6ee 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -41,7 +41,7 @@ def client() -> Iterable[AlephAlphaClient]:
 
 
 @pytest.fixture(scope="session")
-def model() -> str:
+def model_name() -> str:
     config = dotenv_values(".env")
     model = config.get("TEST_MODEL")
     if model is None:
diff --git a/tests/test_complete.py b/tests/test_complete.py
index de965b6..8bce92b 100644
--- a/tests/test_complete.py
+++ b/tests/test_complete.py
@@ -4,10 +4,10 @@
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.completion import CompletionRequest
 
-from tests.common import client, model
+from tests.common import client, model_name
 
 
-def test_complete(client: AlephAlphaClient, model: str):
+def test_complete(client: AlephAlphaClient, model_name: str):
     request = CompletionRequest(
         prompt="",
         maximum_tokens=7,
@@ -17,7 +17,7 @@ def test_complete(client: AlephAlphaClient, model: str):
     )
 
     response = client.complete(
-        model,
+        model_name,
         hosting="cloud",
         request=request,
     )
@@ -26,18 +26,18 @@ def test_complete(client: AlephAlphaClient, model: str):
     assert response.model_version is not None
 
 
-def test_complete_with_explicit_parameters(client: AlephAlphaClient, model: str):
+def test_complete_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
     response = client.complete(
-        model, prompt=[""], maximum_tokens=7, tokens=False, log_probs=0
+        model_name, prompt=[""], maximum_tokens=7, tokens=False, log_probs=0
     )
 
     assert len(response["completions"]) == 1
     assert response["model_version"] is not None
 
 
-def test_complete_fails(client: AlephAlphaClient, model: str):
+def test_complete_fails(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in (model["name"] for model in client.available_models())
+    assert model_name in (model["name"] for model in client.available_models())
 
     # when posting an illegal request
     request = CompletionRequest(
@@ -49,6 +49,6 @@ def test_complete_fails(client: AlephAlphaClient, model: str):
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        response = client.complete(model, hosting="cloud", request=request)
+        response = client.complete(model_name, hosting="cloud", request=request)
 
     assert e.value.args[0] == 400
diff --git a/tests/test_detokenize.py b/tests/test_detokenize.py
index c33725e..d51555e 100644
--- a/tests/test_detokenize.py
+++ b/tests/test_detokenize.py
@@ -2,24 +2,24 @@
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.detokenization import DetokenizationRequest
 
-from tests.common import client, model
+from tests.common import client, model_name
 
 
-def test_detokenize(client: AlephAlphaClient, model: str):
-    response = client.detokenize(model, request=DetokenizationRequest([4711]))
+def test_detokenize(client: AlephAlphaClient, model_name: str):
+    response = client.detokenize(model_name, request=DetokenizationRequest([4711]))
 
     assert response.result is not None
 
 
-def test_detokenize_with_explicit_parameters(client: AlephAlphaClient, model: str):
-    response = client.detokenize(model, token_ids=[4711, 42])
+def test_detokenize_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+    response = client.detokenize(model_name, token_ids=[4711, 42])
 
     assert response["result"] is not None
 
 
-def test_detokenize_fails(client: AlephAlphaClient, model: str):
+def test_detokenize_fails(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in map(lambda model: model["name"], client.available_models())
+    assert model_name in map(lambda model: model["name"], client.available_models())
 
     # when posting an illegal request
     request = DetokenizationRequest([])
@@ -27,7 +27,7 @@ def test_detokenize_fails(client: AlephAlphaClient, model: str):
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
         response = client.detokenize(
-            model,
+            model_name,
             request=request,
         )
 
diff --git a/tests/test_embed.py b/tests/test_embed.py
index 8aee0aa..62a653e 100644
--- a/tests/test_embed.py
+++ b/tests/test_embed.py
@@ -1,28 +1,28 @@
 from typing import List
 import pytest
 from aleph_alpha_client import AlephAlphaClient, EmbeddingRequest
-from tests.common import client, model
+from tests.common import client, model_name
 
 
-def test_embed(client: AlephAlphaClient, model: str):
+def test_embed(client: AlephAlphaClient, model_name: str):
 
     request = EmbeddingRequest(
         prompt=["hello"], layers=[0, -1], pooling=["mean", "max"]
     )
 
-    result = client.embed(model=model, request=request)
+    result = client.embed(model=model_name, request=request)
 
     assert result.model_version is not None
     assert len(result.embeddings) == len(request.pooling) * len(request.layers)
     assert result.tokens is None
 
 
-def test_embed_with_explicit_parameters(client: AlephAlphaClient, model: str):
+def test_embed_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
     layers = [0, -1]
     pooling = ["mean", "max"]
     prompt = ["hello"]
 
-    result = client.embed(model, prompt, pooling, layers)
+    result = client.embed(model_name, prompt, pooling, layers)
 
     assert result["model_version"] is not None
     assert len(result["embeddings"]) == len(layers)
@@ -31,7 +31,7 @@ def test_embed_with_explicit_parameters(client: AlephAlphaClient, model: str):
 
 
 def test_embedding_of_one_token_aggregates_identically(
-    client: AlephAlphaClient, model: str
+    client: AlephAlphaClient, model_name: str
 ):
     request = EmbeddingRequest(
         prompt=[
@@ -41,35 +41,35 @@ def test_embedding_of_one_token_aggregates_identically(
         pooling=["mean", "max"],
     )
 
-    result = client.embed(model=model, request=request)
+    result = client.embed(model=model_name, request=request)
 
     assert (
         result.embeddings[("layer_0", "mean")] == result.embeddings[("layer_0", "max")]
     )
 
 
-def test_embed_with_tokens(client: AlephAlphaClient, model: str):
+def test_embed_with_tokens(client: AlephAlphaClient, model_name: str):
 
     request = EmbeddingRequest(
         prompt=["abc"], layers=[-1], pooling=["mean"], tokens=True
     )
 
-    result = client.embed(model=model, request=request)
+    result = client.embed(model=model_name, request=request)
 
     assert result.model_version is not None
     assert len(result.embeddings) == len(request.pooling) * len(request.layers)
     assert result.tokens is not None
 
 
-def test_failing_embedding_request(client: AlephAlphaClient, model: str):
+def test_failing_embedding_request(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in (model["name"] for model in client.available_models())
+    assert model_name in (model["name"] for model in client.available_models())
 
     # when posting an illegal request
     request = EmbeddingRequest(prompt=["abc"], layers=[0, 1, 2], pooling=["mean"])
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        client.embed(model=model, request=request)
+        client.embed(model=model_name, request=request)
 
     assert e.value.args[0] == 400
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index 3e3bf06..d878bcf 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -3,29 +3,29 @@
 import pytest
 from aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.evaluation import EvaluationRequest
-from tests.common import client, model
+from tests.common import client, model_name
 
 
-def test_evaluate(client: AlephAlphaClient, model: str):
+def test_evaluate(client: AlephAlphaClient, model_name: str):
 
     request = EvaluationRequest(prompt=["hello"], completion_expected="world")
 
-    result = client.evaluate(model=model, request=request)
+    result = client.evaluate(model=model_name, request=request)
 
     assert result.model_version is not None
     assert result.result is not None
 
 
-def test_evaluate_with_explicit_parameters(client: AlephAlphaClient, model: str):
-    result = client.evaluate(model, prompt="hello", completion_expected="world")
+def test_evaluate_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+    result = client.evaluate(model_name, prompt="hello", completion_expected="world")
 
     assert result["model_version"] is not None
     assert result["result"] is not None
 
 
-def test_evaluate_fails(client: AlephAlphaClient, model: str):
+def test_evaluate_fails(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in map(lambda model: model["name"], client.available_models())
+    assert model_name in map(lambda model: model["name"], client.available_models())
 
     # when posting an illegal request
     request = EvaluationRequest(
@@ -36,7 +36,7 @@ def test_evaluate_fails(client: AlephAlphaClient, model: str):
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
         response = client.evaluate(
-            model,
+            model_name,
             hosting="cloud",
             request=request,
         )
diff --git a/tests/test_explanation.py b/tests/test_explanation.py
index 0dd15fc..9553900 100644
--- a/tests/test_explanation.py
+++ b/tests/test_explanation.py
@@ -1,10 +1,10 @@
 import pytest
 from aleph_alpha_client import AlephAlphaClient, ExplanationRequest
 
-from tests.common import client, model
+from tests.common import client, model_name
 
 
-def test_explanation(client: AlephAlphaClient, model: str):
+def test_explanation(client: AlephAlphaClient, model_name: str):
 
     request = ExplanationRequest(
         prompt=["An apple a day"],
@@ -13,15 +13,15 @@ def test_explanation(client: AlephAlphaClient, model: str):
         suppression_factor=0.1,
     )
 
-    explanation = client._explain(model=model, request=request, hosting=None)
+    explanation = client._explain(model=model_name, request=request, hosting=None)
 
     # List is true if not None and not empty
     assert explanation["result"]
 
 
-def test_explain_fails(client: AlephAlphaClient, model: str):
+def test_explain_fails(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in map(lambda model: model["name"], client.available_models())
+    assert model_name in map(lambda model: model["name"], client.available_models())
 
     # when posting an illegal request
     request = ExplanationRequest(
@@ -35,7 +35,7 @@ def test_explain_fails(client: AlephAlphaClient, model: str):
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
         response = client._explain(
-            model,
+            model_name,
             hosting="cloud",
             request=request,
         )
diff --git a/tests/test_qa.py b/tests/test_qa.py
index 9f7db60..d377445 100644
--- a/tests/test_qa.py
+++ b/tests/test_qa.py
@@ -3,12 +3,12 @@
 from aleph_alpha_client.document import Document
 from aleph_alpha_client.qa import QaRequest
 
-from tests.common import client, model
+from tests.common import client, model_name
 
 
-def test_qa(client: AlephAlphaClient, model: str):
+def test_qa(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in map(lambda model: model["name"], client.available_models())
+    assert model_name in map(lambda model: model["name"], client.available_models())
 
     # when posting a QA request with a QaRequest object
     request = QaRequest(
@@ -17,7 +17,7 @@ def test_qa(client: AlephAlphaClient, model: str):
     )
 
     response = client.qa(
-        model,
+        model_name,
         hosting="cloud",
         request=request,
     )
@@ -27,9 +27,9 @@ def test_qa(client: AlephAlphaClient, model: str):
     assert response.model_version is not None
 
 
-def test_qa_no_answer_found(client: AlephAlphaClient, model: str):
+def test_qa_no_answer_found(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in map(lambda model: model["name"], client.available_models())
+    assert model_name in map(lambda model: model["name"], client.available_models())
 
     # when posting a QA request with a QaRequest object
     request = QaRequest(
@@ -38,7 +38,7 @@ def test_qa_no_answer_found(client: AlephAlphaClient, model: str):
     )
 
     response = client.qa(
-        model,
+        model_name,
         hosting="cloud",
         request=request,
     )
@@ -48,13 +48,13 @@ def test_qa_no_answer_found(client: AlephAlphaClient, model: str):
     assert response.model_version is not None
 
 
-def test_qa_with_explicit_parameters(client: AlephAlphaClient, model: str):
+def test_qa_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in map(lambda model: model["name"], client.available_models())
+    assert model_name in map(lambda model: model["name"], client.available_models())
 
     # when posting a QA request with explicit parameters
     response = client.qa(
-        model,
+        model_name,
         hosting="cloud",
         query="Who likes pizza?",
         documents=[Document.from_prompt(["Andreas likes pizza."])],
@@ -65,9 +65,9 @@ def test_qa_with_explicit_parameters(client: AlephAlphaClient, model: str):
     assert response["model_version"] is not None
 
 
-def test_qa_fails(client: AlephAlphaClient, model: str):
+def test_qa_fails(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in map(lambda model: model["name"], client.available_models())
+    assert model_name in map(lambda model: model["name"], client.available_models())
 
     # when posting an illegal request
     request = QaRequest(
@@ -78,7 +78,7 @@ def test_qa_fails(client: AlephAlphaClient, model: str):
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
         response = client.qa(
-            model,
+            model_name,
             hosting="cloud",
             request=request,
         )
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index a81a6b3..c932ada 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -2,26 +2,26 @@
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.tokenization import TokenizationRequest
 
-from tests.common import client, model
+from tests.common import client, model_name
 
 
-def test_tokenize(client: AlephAlphaClient, model: str):
-    response = client.tokenize(model, request=TokenizationRequest("Hello", True, True))
+def test_tokenize(client: AlephAlphaClient, model_name: str):
+    response = client.tokenize(model_name, request=TokenizationRequest("Hello", True, True))
 
     assert len(response.tokens) == 1
     assert len(response.token_ids) == 1
 
 
-def test_tokenize_with_explicit_parameters(client: AlephAlphaClient, model: str):
-    response = client.tokenize(model, prompt="Hello", tokens=True, token_ids=True)
+def test_tokenize_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+    response = client.tokenize(model_name, prompt="Hello", tokens=True, token_ids=True)
 
     assert len(response["tokens"]) == 1
     assert len(response["token_ids"]) == 1
 
 
-def test_tokenize_fails(client: AlephAlphaClient, model: str):
+def test_tokenize_fails(client: AlephAlphaClient, model_name: str):
     # given a client
-    assert model in map(lambda model: model["name"], client.available_models())
+    assert model_name in map(lambda model: model["name"], client.available_models())
 
     # when posting an illegal request
     request = TokenizationRequest("hello", False, False)
@@ -29,7 +29,7 @@ def test_tokenize_fails(client: AlephAlphaClient, model: str):
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
         response = client.tokenize(
-            model,
+            model_name,
             request=request,
         )
 

From e1d3e3ecb5cbc8f93142acebee513b4bfb0daf86 Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 15:32:00 +0200
Subject: [PATCH 03/10] Use AlephAlphaModel for completion request

---
 aleph_alpha_client/__init__.py           |  1 +
 aleph_alpha_client/aleph_alpha_client.py | 18 ++----------
 aleph_alpha_client/aleph_alpha_model.py  | 14 +++++++++
 readme.ipynb                             | 36 ++++++++++++++++--------
 tests/common.py                          |  7 +++++
 tests/test_complete.py                   | 19 ++++++-------
 6 files changed, 57 insertions(+), 38 deletions(-)
 create mode 100644 aleph_alpha_client/aleph_alpha_model.py

diff --git a/aleph_alpha_client/__init__.py b/aleph_alpha_client/__init__.py
index 77d2ade..3c863e9 100644
--- a/aleph_alpha_client/__init__.py
+++ b/aleph_alpha_client/__init__.py
@@ -1,4 +1,5 @@
 from .aleph_alpha_client import AlephAlphaClient, QuotaError, POOLING_OPTIONS
+from .aleph_alpha_model import AlephAlphaModel
 from .image import ImagePrompt
 from .explanation import ExplanationRequest
 from .embedding import EmbeddingRequest
diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 2239ea7..46fa32f 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -151,7 +151,6 @@ def complete(
         stop_sequences: Optional[List[str]] = None,
         tokens: bool = False,
         disable_optimizations: bool = False,
-        request: CompletionRequest = None,
     ):
         """
         Generates samples from a prompt.
@@ -226,18 +225,9 @@ def complete(
                 We continually research optimal ways to work with our models. By default, we apply these optimizations to both your prompt and  completion for you.
 
                 Our goal is to improve your results while using our API. But you can always pass disable_optimizations: true and we will leave your prompt and completion untouched.
-
-            request
-                A CompletionRequest wrapping the above-mentioned parameters except model and hosting
         """
 
-        if request is None:
-            logging.warning(
-                "Calling this method with individual request parameters is deprecated. "
-                + "Please pass a CompletionRequest object as the request parameter instead."
-            )
-
-        named_request = request or CompletionRequest(
+        named_request = CompletionRequest(
             prompt=prompt or [""],
             maximum_tokens=maximum_tokens,
             temperature=temperature,
@@ -268,11 +258,7 @@ def complete(
             print(
                 'We optimized your prompt before sending it to the model. The optimized prompt is available at result["optimized_prompt"]. If you do not want these optimizations applied, you can pass the disable_optimizations flag to your request.'
             )
-        return (
-            response_json
-            if request is None
-            else CompletionResponse.from_json(response_json)
-        )
+        return response_json
 
     def embed(
         self,
diff --git a/aleph_alpha_client/aleph_alpha_model.py b/aleph_alpha_client/aleph_alpha_model.py
new file mode 100644
index 0000000..1818922
--- /dev/null
+++ b/aleph_alpha_client/aleph_alpha_model.py
@@ -0,0 +1,14 @@
+from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
+from aleph_alpha_client.completion import CompletionRequest, CompletionResponse
+
+
+class AlephAlphaModel:
+
+    def __init__(self, client: AlephAlphaClient, model_name: str, hosting: str = "cloud") -> None:
+        self.client = client
+        self.model_name = model_name
+        self.hosting = hosting
+
+    def complete(self, request: CompletionRequest) -> CompletionResponse:
+        response_json = self.client.complete(model = self.model_name, hosting=self.hosting, **request._asdict())
+        return CompletionResponse.from_json(response_json)
\ No newline at end of file
diff --git a/readme.ipynb b/readme.ipynb
index 4558469..0b4809c 100644
--- a/readme.ipynb
+++ b/readme.ipynb
@@ -27,20 +27,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "PermissionError",
+     "evalue": "[Errno 401] {'error': 'InvalidSignature', 'code': 'UNAUTHENTICATED'}",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mPermissionError\u001b[0m                           Traceback (most recent call last)",
+      "\u001b[1;32m/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb Cell 2'\u001b[0m in \u001b[0;36m<cell line: 18>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=12'>13</a>\u001b[0m prompt \u001b[39m=\u001b[39m [\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=13'>14</a>\u001b[0m     image,\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=14'>15</a>\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mQ: What does the picture show? A:\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=15'>16</a>\u001b[0m ]\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=16'>17</a>\u001b[0m request \u001b[39m=\u001b[39m CompletionRequest(prompt\u001b[39m=\u001b[39mprompt, maximum_tokens\u001b[39m=\u001b[39m\u001b[39m20\u001b[39m)\n\u001b[0;32m---> <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=17'>18</a>\u001b[0m result \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mcomplete(request)\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=19'>20</a>\u001b[0m \u001b[39mprint\u001b[39m(result\u001b[39m.\u001b[39mcompletions[\u001b[39m0\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mcompletion\u001b[39m\u001b[39m\"\u001b[39m])\n",
+      "File \u001b[0;32m~/workspace/aleph-open/aleph-alpha-client/aleph_alpha_client/aleph_alpha_model.py:13\u001b[0m, in \u001b[0;36mAlephAlphaModel.complete\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcomplete\u001b[39m(\u001b[39mself\u001b[39m, request: CompletionRequest) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompletionResponse:\n\u001b[0;32m---> 13\u001b[0m     response_json \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mclient\u001b[39m.\u001b[39;49mcomplete(model \u001b[39m=\u001b[39;49m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel_name, hosting\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhosting, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mrequest\u001b[39m.\u001b[39;49m_asdict())\n\u001b[1;32m     14\u001b[0m     \u001b[39mreturn\u001b[39;00m CompletionResponse\u001b[39m.\u001b[39mfrom_json(response_json)\n",
+      "File \u001b[0;32m~/workspace/aleph-open/aleph-alpha-client/aleph_alpha_client/aleph_alpha_client.py:255\u001b[0m, in \u001b[0;36mAlephAlphaClient.complete\u001b[0;34m(self, model, prompt, hosting, maximum_tokens, temperature, top_k, top_p, presence_penalty, frequency_penalty, repetition_penalties_include_prompt, use_multiplicative_presence_penalty, best_of, n, logit_bias, log_probs, stop_sequences, tokens, disable_optimizations)\u001b[0m\n\u001b[1;32m    230\u001b[0m named_request \u001b[39m=\u001b[39m CompletionRequest(\n\u001b[1;32m    231\u001b[0m     prompt\u001b[39m=\u001b[39mprompt \u001b[39mor\u001b[39;00m [\u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m],\n\u001b[1;32m    232\u001b[0m     maximum_tokens\u001b[39m=\u001b[39mmaximum_tokens,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    246\u001b[0m     disable_optimizations\u001b[39m=\u001b[39mdisable_optimizations,\n\u001b[1;32m    247\u001b[0m )\n\u001b[1;32m    249\u001b[0m response \u001b[39m=\u001b[39m requests\u001b[39m.\u001b[39mpost(\n\u001b[1;32m    250\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhost \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mcomplete\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m    251\u001b[0m     headers\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequest_headers,\n\u001b[1;32m    252\u001b[0m     json\u001b[39m=\u001b[39mnamed_request\u001b[39m.\u001b[39mrender_as_body(model, hosting),\n\u001b[1;32m    253\u001b[0m     timeout\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m,\n\u001b[1;32m    254\u001b[0m )\n\u001b[0;32m--> 255\u001b[0m response_json \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_translate_errors(response)\n\u001b[1;32m    256\u001b[0m \u001b[39mif\u001b[39;00m response_json\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39moptimized_prompt\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    257\u001b[0m     \u001b[39m# Return a message to the user that we optimized their prompt\u001b[39;00m\n\u001b[1;32m    258\u001b[0m     \u001b[39mprint\u001b[39m(\n\u001b[1;32m    259\u001b[0m         \u001b[39m'\u001b[39m\u001b[39mWe optimized your prompt before sending it to the model. The optimized prompt is available at result[\u001b[39m\u001b[39m\"\u001b[39m\u001b[39moptimized_prompt\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m]. If you do not want these optimizations applied, you can pass the disable_optimizations flag to your request.\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m    260\u001b[0m     )\n",
+      "File \u001b[0;32m~/workspace/aleph-open/aleph-alpha-client/aleph_alpha_client/aleph_alpha_client.py:460\u001b[0m, in \u001b[0;36mAlephAlphaClient._translate_errors\u001b[0;34m(response)\u001b[0m\n\u001b[1;32m    458\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(response\u001b[39m.\u001b[39mstatus_code, response\u001b[39m.\u001b[39mjson())\n\u001b[1;32m    459\u001b[0m \u001b[39melif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m:\n\u001b[0;32m--> 460\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mPermissionError\u001b[39;00m(response\u001b[39m.\u001b[39mstatus_code, response\u001b[39m.\u001b[39mjson())\n\u001b[1;32m    461\u001b[0m \u001b[39melif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m402\u001b[39m:\n\u001b[1;32m    462\u001b[0m     \u001b[39mraise\u001b[39;00m QuotaError(response\u001b[39m.\u001b[39mstatus_code, response\u001b[39m.\u001b[39mjson())\n",
+      "\u001b[0;31mPermissionError\u001b[0m: [Errno 401] {'error': 'InvalidSignature', 'code': 'UNAUTHENTICATED'}"
+     ]
+    }
+   ],
    "source": [
-    "from aleph_alpha_client import ImagePrompt, AlephAlphaClient, CompletionRequest\n",
+    "from aleph_alpha_client import ImagePrompt, AlephAlphaModel, AlephAlphaClient, CompletionRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
     "# You need to choose a model with multimodal capabilities for this example.\n",
-    "model = \"luminous-base\"\n",
     "url = \"https://cdn-images-1.medium.com/max/1200/1*HunNdlTmoPj8EKpl-jqvBA.png\"\n",
     "\n",
     "image = ImagePrompt.from_url(url)\n",
@@ -49,7 +63,7 @@
     "    \"Q: What does the picture show? A:\",\n",
     "]\n",
     "request = CompletionRequest(prompt=prompt, maximum_tokens=20)\n",
-    "result = client.complete(model, request=request)\n",
+    "result = model.complete(request)\n",
     "\n",
     "print(result.completions[0][\"completion\"])"
    ]
@@ -429,7 +443,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.8.10 ('venv': venv)",
+   "display_name": "Python 3.8.13 64-bit ('3.8-aleph-alpha-client')",
    "language": "python",
    "name": "python3"
   },
@@ -443,12 +457,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.10"
+   "version": "3.8.13"
   },
   "orig_nbformat": 4,
   "vscode": {
    "interpreter": {
-    "hash": "1285231c71741bfec547062555f68dae52df95376b2ffb34687075df3ca42714"
+    "hash": "d222ed40ed0240cca423d298d0a37d8e7bd0dfb1af9103b378684293dbd36ad9"
    }
   }
  },
diff --git a/tests/common.py b/tests/common.py
index aa9f6ee..340089a 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -3,7 +3,14 @@
 from aleph_alpha_client import AlephAlphaClient
 from dotenv import dotenv_values
 
+from aleph_alpha_client.aleph_alpha_model import AlephAlphaModel
 
+
+@pytest.fixture(scope="session")
+def model(client: AlephAlphaClient, model_name: str) -> AlephAlphaModel:
+    return AlephAlphaModel(client, model_name)
+
+    
 @pytest.fixture(scope="session")
 def client() -> Iterable[AlephAlphaClient]:
     config = dotenv_values(".env")
diff --git a/tests/test_complete.py b/tests/test_complete.py
index 8bce92b..cc5d1f3 100644
--- a/tests/test_complete.py
+++ b/tests/test_complete.py
@@ -2,12 +2,13 @@
 from multiprocessing.sharedctypes import Value
 import pytest
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
+from aleph_alpha_client.aleph_alpha_model import AlephAlphaModel
 from aleph_alpha_client.completion import CompletionRequest
 
-from tests.common import client, model_name
+from tests.common import client, model_name, model
 
 
-def test_complete(client: AlephAlphaClient, model_name: str):
+def test_complete(model: AlephAlphaModel):
     request = CompletionRequest(
         prompt="",
         maximum_tokens=7,
@@ -16,17 +17,13 @@ def test_complete(client: AlephAlphaClient, model_name: str):
         logit_bias={1: 2.0},
     )
 
-    response = client.complete(
-        model_name,
-        hosting="cloud",
-        request=request,
-    )
+    response = model.complete(request)
 
     assert len(response.completions) == 1
     assert response.model_version is not None
 
 
-def test_complete_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+def test_complete_with_client(client: AlephAlphaClient, model_name: str):
     response = client.complete(
         model_name, prompt=[""], maximum_tokens=7, tokens=False, log_probs=0
     )
@@ -35,9 +32,9 @@ def test_complete_with_explicit_parameters(client: AlephAlphaClient, model_name:
     assert response["model_version"] is not None
 
 
-def test_complete_fails(client: AlephAlphaClient, model_name: str):
+def test_complete_fails(model: AlephAlphaModel):
     # given a client
-    assert model_name in (model["name"] for model in client.available_models())
+    assert model.model_name in (model["name"] for model in model.client.available_models())
 
     # when posting an illegal request
     request = CompletionRequest(
@@ -49,6 +46,6 @@ def test_complete_fails(client: AlephAlphaClient, model_name: str):
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        response = client.complete(model_name, hosting="cloud", request=request)
+        response = model.complete(request)
 
     assert e.value.args[0] == 400

From 35e3027393705865e0be5eb173619aab379b1cb8 Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 15:48:05 +0200
Subject: [PATCH 04/10] Add tokenize to model

---
 README.md                                | 22 ++++++++---------
 aleph_alpha_client/aleph_alpha_client.py | 10 +-------
 aleph_alpha_client/aleph_alpha_model.py  |  7 +++++-
 readme.ipynb                             | 30 ++++++------------------
 tests/test_tokenize.py                   | 18 +++++++-------
 5 files changed, 32 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index 144ea85..7dcdc79 100644
--- a/README.md
+++ b/README.md
@@ -21,16 +21,15 @@ pip install aleph-alpha-client
 
 
 ```python
-from aleph_alpha_client import ImagePrompt, AlephAlphaClient, CompletionRequest
+from aleph_alpha_client import ImagePrompt, AlephAlphaModel, AlephAlphaClient, CompletionRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    model_name = "luminous-extended"
 )
 
 # You need to choose a model with multimodal capabilities for this example.
-model = "luminous-base"
 url = "https://cdn-images-1.medium.com/max/1200/1*HunNdlTmoPj8EKpl-jqvBA.png"
 
 image = ImagePrompt.from_url(url)
@@ -39,7 +38,7 @@ prompt = [
     "Q: What does the picture show? A:",
 ]
 request = CompletionRequest(prompt=prompt, maximum_tokens=20)
-result = client.complete(model, request=request)
+result = model.complete(request)
 
 print(result.completions[0]["completion"])
 ```
@@ -240,18 +239,17 @@ print(result)
 
 
 ```python
-from aleph_alpha_client import Document, ImagePrompt, AlephAlphaClient, TokenizationRequest
+from aleph_alpha_client import AlephAlphaClient, AlephAlphaModel, TokenizationRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    model_name = "luminous-extended"
 )
 
 # You need to choose a model with qa support and multimodal capabilities for this example.
-model = "luminous-extended"
 request = TokenizationRequest(prompt="This is an example.", tokens=True, token_ids=True)
-response = client.tokenize(model, request=request)
+response = model.tokenize(request)
 
 print(response)
 ```
diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 46fa32f..ee1f73e 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -85,11 +85,6 @@ def tokenize(
         """
         Tokenizes the given prompt for the given model.
         """
-        if request is None:
-            logging.warning(
-                "Calling this method with individual request parameters is deprecated. "
-                + "Please pass a TokenizationRequest object as the request parameter instead."
-            )
         named_request = request or TokenizationRequest(prompt or "", tokens, token_ids)
 
         response = requests.post(
@@ -97,10 +92,7 @@ def tokenize(
             headers=self.request_headers,
             json=named_request.render_as_body(model),
         )
-        response_dict = self._translate_errors(response)
-        return (
-            TokenizationResponse.from_json(response_dict) if request else response_dict
-        )
+        return self._translate_errors(response)
 
     def detokenize(
         self,
diff --git a/aleph_alpha_client/aleph_alpha_model.py b/aleph_alpha_client/aleph_alpha_model.py
index 1818922..f0b7462 100644
--- a/aleph_alpha_client/aleph_alpha_model.py
+++ b/aleph_alpha_client/aleph_alpha_model.py
@@ -1,5 +1,6 @@
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.completion import CompletionRequest, CompletionResponse
+from aleph_alpha_client.tokenization import TokenizationRequest, TokenizationResponse
 
 
 class AlephAlphaModel:
@@ -11,4 +12,8 @@ def __init__(self, client: AlephAlphaClient, model_name: str, hosting: str = "cl
 
     def complete(self, request: CompletionRequest) -> CompletionResponse:
         response_json = self.client.complete(model = self.model_name, hosting=self.hosting, **request._asdict())
-        return CompletionResponse.from_json(response_json)
\ No newline at end of file
+        return CompletionResponse.from_json(response_json)
+
+    def tokenize(self, request: TokenizationRequest) -> TokenizationResponse:
+        response_json = self.client.tokenize(model = self.model_name, **request._asdict())
+        return TokenizationResponse.from_json(response_json)
\ No newline at end of file
diff --git a/readme.ipynb b/readme.ipynb
index 0b4809c..11f1d61 100644
--- a/readme.ipynb
+++ b/readme.ipynb
@@ -27,24 +27,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "PermissionError",
-     "evalue": "[Errno 401] {'error': 'InvalidSignature', 'code': 'UNAUTHENTICATED'}",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mPermissionError\u001b[0m                           Traceback (most recent call last)",
-      "\u001b[1;32m/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb Cell 2'\u001b[0m in \u001b[0;36m<cell line: 18>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=12'>13</a>\u001b[0m prompt \u001b[39m=\u001b[39m [\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=13'>14</a>\u001b[0m     image,\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=14'>15</a>\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mQ: What does the picture show? A:\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=15'>16</a>\u001b[0m ]\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=16'>17</a>\u001b[0m request \u001b[39m=\u001b[39m CompletionRequest(prompt\u001b[39m=\u001b[39mprompt, maximum_tokens\u001b[39m=\u001b[39m\u001b[39m20\u001b[39m)\n\u001b[0;32m---> <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=17'>18</a>\u001b[0m result \u001b[39m=\u001b[39m model\u001b[39m.\u001b[39;49mcomplete(request)\n\u001b[1;32m     <a href='vscode-notebook-cell://wsl%2Bubuntu/home/volker/workspace/aleph-open/aleph-alpha-client/readme.ipynb#ch0000001vscode-remote?line=19'>20</a>\u001b[0m \u001b[39mprint\u001b[39m(result\u001b[39m.\u001b[39mcompletions[\u001b[39m0\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mcompletion\u001b[39m\u001b[39m\"\u001b[39m])\n",
-      "File \u001b[0;32m~/workspace/aleph-open/aleph-alpha-client/aleph_alpha_client/aleph_alpha_model.py:13\u001b[0m, in \u001b[0;36mAlephAlphaModel.complete\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcomplete\u001b[39m(\u001b[39mself\u001b[39m, request: CompletionRequest) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompletionResponse:\n\u001b[0;32m---> 13\u001b[0m     response_json \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mclient\u001b[39m.\u001b[39;49mcomplete(model \u001b[39m=\u001b[39;49m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel_name, hosting\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mhosting, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mrequest\u001b[39m.\u001b[39;49m_asdict())\n\u001b[1;32m     14\u001b[0m     \u001b[39mreturn\u001b[39;00m CompletionResponse\u001b[39m.\u001b[39mfrom_json(response_json)\n",
-      "File \u001b[0;32m~/workspace/aleph-open/aleph-alpha-client/aleph_alpha_client/aleph_alpha_client.py:255\u001b[0m, in \u001b[0;36mAlephAlphaClient.complete\u001b[0;34m(self, model, prompt, hosting, maximum_tokens, temperature, top_k, top_p, presence_penalty, frequency_penalty, repetition_penalties_include_prompt, use_multiplicative_presence_penalty, best_of, n, logit_bias, log_probs, stop_sequences, tokens, disable_optimizations)\u001b[0m\n\u001b[1;32m    230\u001b[0m named_request \u001b[39m=\u001b[39m CompletionRequest(\n\u001b[1;32m    231\u001b[0m     prompt\u001b[39m=\u001b[39mprompt \u001b[39mor\u001b[39;00m [\u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m],\n\u001b[1;32m    232\u001b[0m     maximum_tokens\u001b[39m=\u001b[39mmaximum_tokens,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    246\u001b[0m     disable_optimizations\u001b[39m=\u001b[39mdisable_optimizations,\n\u001b[1;32m    247\u001b[0m )\n\u001b[1;32m    249\u001b[0m response \u001b[39m=\u001b[39m requests\u001b[39m.\u001b[39mpost(\n\u001b[1;32m    250\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhost \u001b[39m+\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mcomplete\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m    251\u001b[0m     headers\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequest_headers,\n\u001b[1;32m    252\u001b[0m     json\u001b[39m=\u001b[39mnamed_request\u001b[39m.\u001b[39mrender_as_body(model, hosting),\n\u001b[1;32m    253\u001b[0m     timeout\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m,\n\u001b[1;32m    254\u001b[0m )\n\u001b[0;32m--> 255\u001b[0m response_json \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_translate_errors(response)\n\u001b[1;32m    256\u001b[0m \u001b[39mif\u001b[39;00m response_json\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39moptimized_prompt\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    257\u001b[0m     \u001b[39m# Return a message to the user that we optimized their prompt\u001b[39;00m\n\u001b[1;32m    258\u001b[0m     \u001b[39mprint\u001b[39m(\n\u001b[1;32m    259\u001b[0m         \u001b[39m'\u001b[39m\u001b[39mWe optimized your prompt before sending it to the model. The optimized prompt is available at result[\u001b[39m\u001b[39m\"\u001b[39m\u001b[39moptimized_prompt\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m]. If you do not want these optimizations applied, you can pass the disable_optimizations flag to your request.\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m    260\u001b[0m     )\n",
-      "File \u001b[0;32m~/workspace/aleph-open/aleph-alpha-client/aleph_alpha_client/aleph_alpha_client.py:460\u001b[0m, in \u001b[0;36mAlephAlphaClient._translate_errors\u001b[0;34m(response)\u001b[0m\n\u001b[1;32m    458\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(response\u001b[39m.\u001b[39mstatus_code, response\u001b[39m.\u001b[39mjson())\n\u001b[1;32m    459\u001b[0m \u001b[39melif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m401\u001b[39m:\n\u001b[0;32m--> 460\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mPermissionError\u001b[39;00m(response\u001b[39m.\u001b[39mstatus_code, response\u001b[39m.\u001b[39mjson())\n\u001b[1;32m    461\u001b[0m \u001b[39melif\u001b[39;00m response\u001b[39m.\u001b[39mstatus_code \u001b[39m==\u001b[39m \u001b[39m402\u001b[39m:\n\u001b[1;32m    462\u001b[0m     \u001b[39mraise\u001b[39;00m QuotaError(response\u001b[39m.\u001b[39mstatus_code, response\u001b[39m.\u001b[39mjson())\n",
-      "\u001b[0;31mPermissionError\u001b[0m: [Errno 401] {'error': 'InvalidSignature', 'code': 'UNAUTHENTICATED'}"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from aleph_alpha_client import ImagePrompt, AlephAlphaModel, AlephAlphaClient, CompletionRequest\n",
     "import os\n",
@@ -330,18 +315,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import Document, ImagePrompt, AlephAlphaClient, TokenizationRequest\n",
+    "from aleph_alpha_client import AlephAlphaClient, AlephAlphaModel, TokenizationRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
     "# You need to choose a model with qa support and multimodal capabilities for this example.\n",
-    "model = \"luminous-extended\"\n",
     "request = TokenizationRequest(prompt=\"This is an example.\", tokens=True, token_ids=True)\n",
-    "response = client.tokenize(model, request=request)\n",
+    "response = model.tokenize(request)\n",
     "\n",
     "print(response)"
    ]
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index c932ada..9e7ab75 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -1,36 +1,34 @@
 import pytest
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
+from aleph_alpha_client.aleph_alpha_model import AlephAlphaModel
 from aleph_alpha_client.tokenization import TokenizationRequest
 
-from tests.common import client, model_name
+from tests.common import client, model_name, model
 
 
-def test_tokenize(client: AlephAlphaClient, model_name: str):
-    response = client.tokenize(model_name, request=TokenizationRequest("Hello", True, True))
+def test_tokenize(model: AlephAlphaModel):
+    response = model.tokenize(request=TokenizationRequest("Hello", tokens=True, token_ids=True))
 
     assert len(response.tokens) == 1
     assert len(response.token_ids) == 1
 
 
-def test_tokenize_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+def test_tokenize_with_client(client: AlephAlphaClient, model_name: str):
     response = client.tokenize(model_name, prompt="Hello", tokens=True, token_ids=True)
 
     assert len(response["tokens"]) == 1
     assert len(response["token_ids"]) == 1
 
 
-def test_tokenize_fails(client: AlephAlphaClient, model_name: str):
+def test_tokenize_fails(model: AlephAlphaModel):
     # given a client
-    assert model_name in map(lambda model: model["name"], client.available_models())
+    assert model.model_name in map(lambda model: model["name"], model.client.available_models())
 
     # when posting an illegal request
     request = TokenizationRequest("hello", False, False)
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        response = client.tokenize(
-            model_name,
-            request=request,
-        )
+        response = model.tokenize(request)
 
     assert e.value.args[0] == 400
\ No newline at end of file

From 197a30a0b1157a1beeb3e549e1c04dd95313951f Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 16:10:40 +0200
Subject: [PATCH 05/10] Add detokenize  and cleanup tokenize and complete

---
 README.md                                | 11 +++++------
 aleph_alpha_client/aleph_alpha_client.py | 19 +++----------------
 aleph_alpha_client/aleph_alpha_model.py  |  7 ++++++-
 aleph_alpha_client/completion.py         | 18 +++++++++++++++---
 readme.ipynb                             | 11 +++++------
 tests/test_detokenize.py                 | 18 ++++++++----------
 tests/test_tokenize.py                   |  4 ++--
 7 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/README.md b/README.md
index 7dcdc79..531b5a8 100644
--- a/README.md
+++ b/README.md
@@ -259,18 +259,17 @@ print(response)
 
 
 ```python
-from aleph_alpha_client import Document, ImagePrompt, AlephAlphaClient, DetokenizationRequest
+from aleph_alpha_client import AlephAlphaClient, AlephAlphaModel, DetokenizationRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    model_name = "luminous-extended"
 )
 
 # You need to choose a model with qa support and multimodal capabilities for this example.
-model = "luminous-extended"
 request = DetokenizationRequest(token_ids=[1730, 387, 300, 4377, 17])
-response = client.detokenize(model, request=request)
+response = model.detokenize(request)
 
 print(response)
 ```
diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index ee1f73e..3868287 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -80,12 +80,11 @@ def tokenize(
         prompt: Optional[str] = None,
         tokens: bool = True,
         token_ids: bool = True,
-        request: Optional[TokenizationRequest] = None,
     ) -> Any:
         """
         Tokenizes the given prompt for the given model.
         """
-        named_request = request or TokenizationRequest(prompt or "", tokens, token_ids)
+        named_request = TokenizationRequest(prompt or "", tokens, token_ids)
 
         response = requests.post(
             self.host + "tokenize",
@@ -98,30 +97,18 @@ def detokenize(
         self,
         model: str,
         token_ids: List[int] = [],
-        request: Optional[DetokenizationRequest] = None,
     ):
         """
         Detokenizes the given tokens.
         """
-        if request is None:
-            logging.warning(
-                "Calling this method with individual request parameters is deprecated. "
-                + "Please pass a DetokenizationRequest object as the request parameter instead."
-            )
-
-        named_request = request or DetokenizationRequest(token_ids)
+        named_request = DetokenizationRequest(token_ids)
         response = requests.post(
             self.host + "detokenize",
             headers=self.request_headers,
             json=named_request.render_as_body(model),
             timeout=None,
         )
-        response_dict = self._translate_errors(response)
-        return (
-            DetokenizationResponse.from_json(response_dict)
-            if request
-            else response_dict
-        )
+        return self._translate_errors(response)
 
     def complete(
         self,
diff --git a/aleph_alpha_client/aleph_alpha_model.py b/aleph_alpha_client/aleph_alpha_model.py
index f0b7462..18145b6 100644
--- a/aleph_alpha_client/aleph_alpha_model.py
+++ b/aleph_alpha_client/aleph_alpha_model.py
@@ -1,5 +1,6 @@
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.completion import CompletionRequest, CompletionResponse
+from aleph_alpha_client.detokenization import DetokenizationRequest, DetokenizationResponse
 from aleph_alpha_client.tokenization import TokenizationRequest, TokenizationResponse
 
 
@@ -16,4 +17,8 @@ def complete(self, request: CompletionRequest) -> CompletionResponse:
 
     def tokenize(self, request: TokenizationRequest) -> TokenizationResponse:
         response_json = self.client.tokenize(model = self.model_name, **request._asdict())
-        return TokenizationResponse.from_json(response_json)
\ No newline at end of file
+        return TokenizationResponse.from_json(response_json)
+
+    def detokenize(self, request: DetokenizationRequest) -> DetokenizationResponse:
+        response_json = self.client.detokenize(model = self.model_name, **request._asdict())
+        return DetokenizationResponse.from_json(response_json)
\ No newline at end of file
diff --git a/aleph_alpha_client/completion.py b/aleph_alpha_client/completion.py
index 2c69e55..1e2f0d4 100644
--- a/aleph_alpha_client/completion.py
+++ b/aleph_alpha_client/completion.py
@@ -115,10 +115,22 @@ def render_as_body(self, model: str, hosting: str) -> Dict[str, Any]:
         }
 
 
+class CompletionResult(NamedTuple):
+    log_probs: Optional[Sequence[Mapping[str, Optional[float]]]] = None
+    completion: Optional[str] = None
+    completion_tokens: Optional[Sequence[str]] = None
+    finish_reason: Optional[str] = None
+    message: Optional[str] = None
+
+
 class CompletionResponse(NamedTuple):
-    model_version: Optional[str] = None
-    completions: Optional[Mapping[str, Any]] = None
+    model_version: str
+    completions: Sequence[CompletionResult]
+    optimized_prompt: Optional[Sequence[str]] = None
 
     @staticmethod
     def from_json(json: Dict[str, Any]) -> "CompletionResponse":
-        return CompletionResponse(**json)
+        return CompletionResponse(
+            model_version=json["model_version"], 
+            completions=[CompletionResult(**item) for item in json["completions"]],
+            optimized_prompt=json.get("optimized_prompt"))
diff --git a/readme.ipynb b/readme.ipynb
index 11f1d61..70536b7 100644
--- a/readme.ipynb
+++ b/readme.ipynb
@@ -344,18 +344,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import Document, ImagePrompt, AlephAlphaClient, DetokenizationRequest\n",
+    "from aleph_alpha_client import AlephAlphaClient, AlephAlphaModel, DetokenizationRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
     "# You need to choose a model with qa support and multimodal capabilities for this example.\n",
-    "model = \"luminous-extended\"\n",
     "request = DetokenizationRequest(token_ids=[1730, 387, 300, 4377, 17])\n",
-    "response = client.detokenize(model, request=request)\n",
+    "response = model.detokenize(request)\n",
     "\n",
     "print(response)"
    ]
diff --git a/tests/test_detokenize.py b/tests/test_detokenize.py
index d51555e..7e487a5 100644
--- a/tests/test_detokenize.py
+++ b/tests/test_detokenize.py
@@ -1,34 +1,32 @@
 import pytest
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
+from aleph_alpha_client.aleph_alpha_model import AlephAlphaModel
 from aleph_alpha_client.detokenization import DetokenizationRequest
 
-from tests.common import client, model_name
+from tests.common import client, model_name, model
 
 
-def test_detokenize(client: AlephAlphaClient, model_name: str):
-    response = client.detokenize(model_name, request=DetokenizationRequest([4711]))
+def test_detokenize(model: AlephAlphaModel):
+    response = model.detokenize(DetokenizationRequest([4711]))
 
     assert response.result is not None
 
 
-def test_detokenize_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+def test_detokenize_with_client(client: AlephAlphaClient, model_name: str):
     response = client.detokenize(model_name, token_ids=[4711, 42])
 
     assert response["result"] is not None
 
 
-def test_detokenize_fails(client: AlephAlphaClient, model_name: str):
+def test_detokenize_fails(model: AlephAlphaModel):
     # given a client
-    assert model_name in map(lambda model: model["name"], client.available_models())
+    assert model.model_name in map(lambda model: model["name"], model.client.available_models())
 
     # when posting an illegal request
     request = DetokenizationRequest([])
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        response = client.detokenize(
-            model_name,
-            request=request,
-        )
+        response = model.detokenize(request=request)
 
     e.value.args[0] == 400
\ No newline at end of file
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index 9e7ab75..064e6af 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -9,8 +9,8 @@
 def test_tokenize(model: AlephAlphaModel):
     response = model.tokenize(request=TokenizationRequest("Hello", tokens=True, token_ids=True))
 
-    assert len(response.tokens) == 1
-    assert len(response.token_ids) == 1
+    assert response.tokens and len(response.tokens) == 1
+    assert response.token_ids and len(response.token_ids) == 1
 
 
 def test_tokenize_with_client(client: AlephAlphaClient, model_name: str):

From 0fed1758dbd408d8e9f6a3051a92fd38ab2c70b2 Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 16:24:25 +0200
Subject: [PATCH 06/10] Add embed request to model

---
 README.md                                | 25 ++++++++----------
 aleph_alpha_client/aleph_alpha_client.py | 18 ++-----------
 aleph_alpha_client/aleph_alpha_model.py  |  7 +++++-
 readme.ipynb                             | 25 ++++++++----------
 tests/test_embed.py                      | 32 +++++++++++-------------
 5 files changed, 45 insertions(+), 62 deletions(-)

diff --git a/README.md b/README.md
index 531b5a8..a703447 100644
--- a/README.md
+++ b/README.md
@@ -99,17 +99,16 @@ print(result)
 
 
 ```python
-from aleph_alpha_client import ImagePrompt, AlephAlphaClient, EmbeddingRequest
+from aleph_alpha_client import AlephAlphaModel, AlephAlphaClient, EmbeddingRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    model_name = "luminous-extended"
 )
 
-model = "luminous-base"
 request = EmbeddingRequest(prompt=["This is an example."], layers=[-1], pooling=["mean"])
-result = client.embed(model, request=request)
+result = model.embed(request)
 
 print(result)
 ```
@@ -120,17 +119,15 @@ print(result)
 
 
 ```python
-from aleph_alpha_client import ImagePrompt, AlephAlphaClient, EmbeddingRequest
+from aleph_alpha_client import ImagePrompt, AlephAlphaClient, AlephAlphaModel, EmbeddingRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    # You need to choose a model with multimodal capabilities for this example.
+    model_name = "luminous-extended"
 )
 
-# You need to choose a model with multimodal capabilities for this example.
-model = "luminous-base"
-
 url = "https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/2008-09-24_Blockbuster_in_Durham.jpg/330px-2008-09-24_Blockbuster_in_Durham.jpg"
 image = ImagePrompt.from_url(url)
 prompt = [
@@ -138,7 +135,7 @@ prompt = [
     "Q: What is the name of the store?\nA:",
 ]
 request = EmbeddingRequest(prompt=prompt, layers=[-1], pooling=["mean"])
-result = client.embed(model, request=request)
+result = model.embed(request)
 
 print(result)
 ```
diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 3868287..1b0101b 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -248,7 +248,6 @@ def embed(
         hosting: str = "cloud",
         tokens: Optional[bool] = False,
         type: Optional[str] = None,
-        request: EmbeddingRequest = None,
     ):
         """
         Embeds a multi-modal prompt and returns vectors that can be used for downstream tasks (e.g. semantic similarity) and models (e.g. classifiers).
@@ -285,16 +284,8 @@ def embed(
             type
                 Type of the embedding (e.g. symmetric or asymmetric)
 
-            request (EmbeddingRequest, optional):
-                Input for the embeddings to be computed
         """
-        if request is None:
-            logging.warning(
-                "Calling this method with individual request parameters is deprecated. "
-                + "Please pass an EmbeddingRequest object as the request parameter instead."
-            )
-
-        named_request = request or EmbeddingRequest(
+        named_request = EmbeddingRequest(
             prompt=prompt or "",
             layers=layers or [],
             pooling=pooling or [],
@@ -305,12 +296,7 @@ def embed(
         response = requests.post(
             f"{self.host}embed", headers=self.request_headers, json=body
         )
-        response_dict = self._translate_errors(response)
-        return (
-            response_dict
-            if request is None
-            else EmbeddingResponse.from_json(response_dict)
-        )
+        return self._translate_errors(response)
 
     def evaluate(
         self,
diff --git a/aleph_alpha_client/aleph_alpha_model.py b/aleph_alpha_client/aleph_alpha_model.py
index 18145b6..d1e7ef7 100644
--- a/aleph_alpha_client/aleph_alpha_model.py
+++ b/aleph_alpha_client/aleph_alpha_model.py
@@ -1,6 +1,7 @@
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.completion import CompletionRequest, CompletionResponse
 from aleph_alpha_client.detokenization import DetokenizationRequest, DetokenizationResponse
+from aleph_alpha_client.embedding import EmbeddingRequest, EmbeddingResponse
 from aleph_alpha_client.tokenization import TokenizationRequest, TokenizationResponse
 
 
@@ -21,4 +22,8 @@ def tokenize(self, request: TokenizationRequest) -> TokenizationResponse:
 
     def detokenize(self, request: DetokenizationRequest) -> DetokenizationResponse:
         response_json = self.client.detokenize(model = self.model_name, **request._asdict())
-        return DetokenizationResponse.from_json(response_json)
\ No newline at end of file
+        return DetokenizationResponse.from_json(response_json)
+
+    def embed(self, request: EmbeddingRequest) -> EmbeddingResponse:
+        response_json = self.client.embed(model = self.model_name, hosting=self.hosting, **request._asdict())
+        return EmbeddingResponse.from_json(response_json)
diff --git a/readme.ipynb b/readme.ipynb
index 70536b7..7855208 100644
--- a/readme.ipynb
+++ b/readme.ipynb
@@ -133,17 +133,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import ImagePrompt, AlephAlphaClient, EmbeddingRequest\n",
+    "from aleph_alpha_client import AlephAlphaModel, AlephAlphaClient, EmbeddingRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
-    "model = \"luminous-base\"\n",
     "request = EmbeddingRequest(prompt=[\"This is an example.\"], layers=[-1], pooling=[\"mean\"])\n",
-    "result = client.embed(model, request=request)\n",
+    "result = model.embed(request)\n",
     "\n",
     "print(result)"
    ]
@@ -162,17 +161,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import ImagePrompt, AlephAlphaClient, EmbeddingRequest\n",
+    "from aleph_alpha_client import ImagePrompt, AlephAlphaClient, AlephAlphaModel, EmbeddingRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    # You need to choose a model with multimodal capabilities for this example.\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
-    "# You need to choose a model with multimodal capabilities for this example.\n",
-    "model = \"luminous-base\"\n",
-    "\n",
     "url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/2008-09-24_Blockbuster_in_Durham.jpg/330px-2008-09-24_Blockbuster_in_Durham.jpg\"\n",
     "image = ImagePrompt.from_url(url)\n",
     "prompt = [\n",
@@ -180,7 +177,7 @@
     "    \"Q: What is the name of the store?\\nA:\",\n",
     "]\n",
     "request = EmbeddingRequest(prompt=prompt, layers=[-1], pooling=[\"mean\"])\n",
-    "result = client.embed(model, request=request)\n",
+    "result = model.embed(request)\n",
     "\n",
     "print(result)"
    ]
diff --git a/tests/test_embed.py b/tests/test_embed.py
index 62a653e..b4090a2 100644
--- a/tests/test_embed.py
+++ b/tests/test_embed.py
@@ -1,23 +1,24 @@
 from typing import List
 import pytest
 from aleph_alpha_client import AlephAlphaClient, EmbeddingRequest
-from tests.common import client, model_name
+from aleph_alpha_client.aleph_alpha_model import AlephAlphaModel
+from tests.common import client, model_name, model
 
 
-def test_embed(client: AlephAlphaClient, model_name: str):
+def test_embed(model: AlephAlphaModel):
 
     request = EmbeddingRequest(
         prompt=["hello"], layers=[0, -1], pooling=["mean", "max"]
     )
 
-    result = client.embed(model=model_name, request=request)
+    result = model.embed(request=request)
 
     assert result.model_version is not None
-    assert len(result.embeddings) == len(request.pooling) * len(request.layers)
+    assert result.embeddings and len(result.embeddings) == len(request.pooling) * len(request.layers)
     assert result.tokens is None
 
 
-def test_embed_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+def test_embed_with_client(client: AlephAlphaClient, model_name: str):
     layers = [0, -1]
     pooling = ["mean", "max"]
     prompt = ["hello"]
@@ -30,9 +31,7 @@ def test_embed_with_explicit_parameters(client: AlephAlphaClient, model_name: st
     assert result["tokens"] is None
 
 
-def test_embedding_of_one_token_aggregates_identically(
-    client: AlephAlphaClient, model_name: str
-):
+def test_embedding_of_one_token_aggregates_identically(model: AlephAlphaModel):
     request = EmbeddingRequest(
         prompt=[
             "hello"
@@ -41,35 +40,34 @@ def test_embedding_of_one_token_aggregates_identically(
         pooling=["mean", "max"],
     )
 
-    result = client.embed(model=model_name, request=request)
+    result = model.embed(request)
 
     assert (
-        result.embeddings[("layer_0", "mean")] == result.embeddings[("layer_0", "max")]
+        result.embeddings and result.embeddings[("layer_0", "mean")] == result.embeddings[("layer_0", "max")]
     )
 
 
-def test_embed_with_tokens(client: AlephAlphaClient, model_name: str):
-
+def test_embed_with_tokens(model: AlephAlphaModel):
     request = EmbeddingRequest(
         prompt=["abc"], layers=[-1], pooling=["mean"], tokens=True
     )
 
-    result = client.embed(model=model_name, request=request)
+    result = model.embed(request)
 
     assert result.model_version is not None
-    assert len(result.embeddings) == len(request.pooling) * len(request.layers)
+    assert result.embeddings and len(result.embeddings) == len(request.pooling) * len(request.layers)
     assert result.tokens is not None
 
 
-def test_failing_embedding_request(client: AlephAlphaClient, model_name: str):
+def test_failing_embedding_request(model: AlephAlphaModel):
     # given a client
-    assert model_name in (model["name"] for model in client.available_models())
+    assert model.model_name in (model["name"] for model in model.client.available_models())
 
     # when posting an illegal request
     request = EmbeddingRequest(prompt=["abc"], layers=[0, 1, 2], pooling=["mean"])
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        client.embed(model=model_name, request=request)
+        model.embed(request)
 
     assert e.value.args[0] == 400

From 7c5ea1c5dbe256f1c2a0362de22ef9e93d29dbe7 Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 16:31:42 +0200
Subject: [PATCH 07/10] Add evaluate request to model

---
 README.md                                | 25 +++++++++++-------------
 aleph_alpha_client/aleph_alpha_client.py | 19 ++----------------
 aleph_alpha_client/aleph_alpha_model.py  |  5 +++++
 readme.ipynb                             | 25 +++++++++++-------------
 tests/test_evaluate.py                   | 19 ++++++++----------
 5 files changed, 37 insertions(+), 56 deletions(-)

diff --git a/README.md b/README.md
index a703447..2547c42 100644
--- a/README.md
+++ b/README.md
@@ -48,17 +48,16 @@ print(result.completions[0]["completion"])
 
 
 ```python
-from aleph_alpha_client import ImagePrompt, AlephAlphaClient, EvaluationRequest
+from aleph_alpha_client import AlephAlphaClient, AlephAlphaModel, EvaluationRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    model_name = "luminous-extended"
 )
 
-model = "luminous-base"
 request = EvaluationRequest(prompt="The api works", completion_expected=" well")
-result = client.evaluate(model, request=request)
+result = model.evaluate(request)
 
 print(result)
 
@@ -70,17 +69,15 @@ print(result)
 
 
 ```python
-from aleph_alpha_client import ImagePrompt, AlephAlphaClient, EvaluationRequest
+from aleph_alpha_client import ImagePrompt, AlephAlphaClient, AlephAlphaModel, EvaluationRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    # You need to choose a model with multimodal capabilities for this example.
+    model_name = "luminous-extended"
 )
 
-# You need to choose a model with multimodal capabilities for this example.
-model = "luminous-base"
-
 url = "https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/2008-09-24_Blockbuster_in_Durham.jpg/330px-2008-09-24_Blockbuster_in_Durham.jpg"
 image = ImagePrompt.from_url(url)
 prompt = [
@@ -88,7 +85,7 @@ prompt = [
     "Q: What is the name of the store?\nA:",
 ]
 request = EvaluationRequest(prompt=prompt, completion_expected=" Blockbuster Video")
-result = client.evaluate(model, request=request)
+result = model.evaluate(request)
 
 print(result)
 ```
diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 1b0101b..8bc0de0 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -304,7 +304,6 @@ def evaluate(
         completion_expected: str = None,
         hosting: str = "cloud",
         prompt: Union[str, List[Union[str, ImagePrompt]]] = "",
-        request: EvaluationRequest = None,
     ):
         """
         Evaluates the model's likelihood to produce a completion given a prompt.
@@ -323,18 +322,9 @@ def evaluate(
 
             prompt (str, optional, default ""):
                 The text to be completed. Unconditional completion can be used with an empty string (default). The prompt may contain a zero shot or few shot task.
-
-            request (EvaluationRequest, optional):
-                Input for the evaluation to be computed
         """
 
-        if request is None:
-            logging.warning(
-                "Calling this method with individual request parameters is deprecated. "
-                + "Please pass an EvaluationRequest object as the request parameter instead."
-            )
-
-        named_request = request or EvaluationRequest(
+        named_request = EvaluationRequest(
             prompt=prompt, completion_expected=completion_expected or ""
         )
         response = requests.post(
@@ -342,12 +332,7 @@ def evaluate(
             headers=self.request_headers,
             json=named_request.render_as_body(model, hosting),
         )
-        response_dict = self._translate_errors(response)
-        return (
-            response_dict
-            if request is None
-            else EvaluationResponse.from_json(response_dict)
-        )
+        return self._translate_errors(response)
 
     def qa(
         self,
diff --git a/aleph_alpha_client/aleph_alpha_model.py b/aleph_alpha_client/aleph_alpha_model.py
index d1e7ef7..4969f41 100644
--- a/aleph_alpha_client/aleph_alpha_model.py
+++ b/aleph_alpha_client/aleph_alpha_model.py
@@ -2,6 +2,7 @@
 from aleph_alpha_client.completion import CompletionRequest, CompletionResponse
 from aleph_alpha_client.detokenization import DetokenizationRequest, DetokenizationResponse
 from aleph_alpha_client.embedding import EmbeddingRequest, EmbeddingResponse
+from aleph_alpha_client.evaluation import EvaluationRequest, EvaluationResponse
 from aleph_alpha_client.tokenization import TokenizationRequest, TokenizationResponse
 
 
@@ -27,3 +28,7 @@ def detokenize(self, request: DetokenizationRequest) -> DetokenizationResponse:
     def embed(self, request: EmbeddingRequest) -> EmbeddingResponse:
         response_json = self.client.embed(model = self.model_name, hosting=self.hosting, **request._asdict())
         return EmbeddingResponse.from_json(response_json)
+
+    def evaluate(self, request: EvaluationRequest) -> EvaluationResponse:
+        response_json = self.client.evaluate(model = self.model_name, hosting=self.hosting, **request._asdict())
+        return EvaluationResponse.from_json(response_json)
diff --git a/readme.ipynb b/readme.ipynb
index 7855208..f7fec4f 100644
--- a/readme.ipynb
+++ b/readme.ipynb
@@ -67,17 +67,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import ImagePrompt, AlephAlphaClient, EvaluationRequest\n",
+    "from aleph_alpha_client import AlephAlphaClient, AlephAlphaModel, EvaluationRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
-    "model = \"luminous-base\"\n",
     "request = EvaluationRequest(prompt=\"The api works\", completion_expected=\" well\")\n",
-    "result = client.evaluate(model, request=request)\n",
+    "result = model.evaluate(request)\n",
     "\n",
     "print(result)\n"
    ]
@@ -96,17 +95,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import ImagePrompt, AlephAlphaClient, EvaluationRequest\n",
+    "from aleph_alpha_client import ImagePrompt, AlephAlphaClient, AlephAlphaModel, EvaluationRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    # You need to choose a model with multimodal capabilities for this example.\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
-    "# You need to choose a model with multimodal capabilities for this example.\n",
-    "model = \"luminous-base\"\n",
-    "\n",
     "url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/2008-09-24_Blockbuster_in_Durham.jpg/330px-2008-09-24_Blockbuster_in_Durham.jpg\"\n",
     "image = ImagePrompt.from_url(url)\n",
     "prompt = [\n",
@@ -114,7 +111,7 @@
     "    \"Q: What is the name of the store?\\nA:\",\n",
     "]\n",
     "request = EvaluationRequest(prompt=prompt, completion_expected=\" Blockbuster Video\")\n",
-    "result = client.evaluate(model, request=request)\n",
+    "result = model.evaluate(request)\n",
     "\n",
     "print(result)"
    ]
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index d878bcf..7a6648a 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -2,30 +2,31 @@
 from typing import List
 import pytest
 from aleph_alpha_client import AlephAlphaClient
+from aleph_alpha_client.aleph_alpha_model import AlephAlphaModel
 from aleph_alpha_client.evaluation import EvaluationRequest
-from tests.common import client, model_name
+from tests.common import client, model_name, model
 
 
-def test_evaluate(client: AlephAlphaClient, model_name: str):
+def test_evaluate(model: AlephAlphaModel):
 
     request = EvaluationRequest(prompt=["hello"], completion_expected="world")
 
-    result = client.evaluate(model=model_name, request=request)
+    result = model.evaluate(request)
 
     assert result.model_version is not None
     assert result.result is not None
 
 
-def test_evaluate_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+def test_evaluate_with_client(client: AlephAlphaClient, model_name: str):
     result = client.evaluate(model_name, prompt="hello", completion_expected="world")
 
     assert result["model_version"] is not None
     assert result["result"] is not None
 
 
-def test_evaluate_fails(client: AlephAlphaClient, model_name: str):
+def test_evaluate_fails(model: AlephAlphaModel):
     # given a client
-    assert model_name in map(lambda model: model["name"], client.available_models())
+    assert model.model_name in map(lambda model: model["name"], model.client.available_models())
 
     # when posting an illegal request
     request = EvaluationRequest(
@@ -35,10 +36,6 @@ def test_evaluate_fails(client: AlephAlphaClient, model_name: str):
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        response = client.evaluate(
-            model_name,
-            hosting="cloud",
-            request=request,
-        )
+        response = model.evaluate(request=request)
 
     assert e.value.args[0] == 400

From 2003c8c19c1dcb443fcf361b3a85eaa30608c710 Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 16:42:24 +0200
Subject: [PATCH 08/10] Add qa request to model

---
 README.md                                | 42 ++++++++++--------------
 aleph_alpha_client/aleph_alpha_client.py | 17 ++--------
 aleph_alpha_client/aleph_alpha_model.py  |  5 +++
 readme.ipynb                             | 42 ++++++++++--------------
 tests/test_qa.py                         | 35 +++++++-------------
 5 files changed, 55 insertions(+), 86 deletions(-)

diff --git a/README.md b/README.md
index 2547c42..d2aa57d 100644
--- a/README.md
+++ b/README.md
@@ -143,17 +143,15 @@ print(result)
 
 
 ```python
-from aleph_alpha_client import Document, AlephAlphaClient, QaRequest
+from aleph_alpha_client import Document, AlephAlphaClient, AlephAlphaModel, QaRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    # You need to choose a model with qa support for this example.
+    model_name = "luminous-extended"
 )
 
-# You need to choose a model with qa support for this example.
-model = "luminous-extended"
-
 docx_file = "./tests/sample.docx"
 document = Document.from_docx_file(docx_file)
 
@@ -162,7 +160,7 @@ request = QaRequest(
     documents = [document]
 )
 
-result = client.qa(model, request=request)
+result = model.qa(request)
 
 print(result)
 ```
@@ -172,17 +170,15 @@ print(result)
 
 
 ```python
-from aleph_alpha_client import Document, AlephAlphaClient, QaRequest
+from aleph_alpha_client import AlephAlphaClient, AlephAlphaModel, QaRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    # You need to choose a model with qa support for this example.
+    model_name = "luminous-extended"
 )
 
-# You need to choose a model with qa support for this example.
-model = "luminous-extended"
-
 prompt = "In imperative programming, a computer program is a sequence of instructions in a programming language that a computer can execute or interpret."
 document = Document.from_text(prompt)
 
@@ -191,7 +187,7 @@ request = QaRequest(
     documents = [document],
 )
 
-result = client.qa(model, request=request)
+result = model.qa(request)
 
 print(result)
 ```
@@ -202,17 +198,15 @@ print(result)
 
 
 ```python
-from aleph_alpha_client import Document, ImagePrompt, AlephAlphaClient, QaRequest
+from aleph_alpha_client import Document, ImagePrompt, AlephAlphaClient, AlephAlphaModel, QaRequest
 import os
 
-client = AlephAlphaClient(
-    host="https://api.aleph-alpha.com",
-    token=os.getenv("AA_TOKEN")
+model = AlephAlphaModel(
+    AlephAlphaClient(host="https://api.aleph-alpha.com", token=os.getenv("AA_TOKEN")),
+    # You need to choose a model with qa support for this example.
+    model_name = "luminous-extended"
 )
 
-# You need to choose a model with qa support and multimodal capabilities for this example.
-model = "luminous-extended"
-
 url = "https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/2008-09-24_Blockbuster_in_Durham.jpg/330px-2008-09-24_Blockbuster_in_Durham.jpg"
 image = ImagePrompt.from_url(url)
 prompt = [image]
@@ -223,7 +217,7 @@ request = QaRequest (
     documents = [document]
 )
 
-result = client.qa(model, request=request)
+result = model.qa(request)
 
 print(result)
 ```
diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 8bc0de0..5e11e58 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -345,7 +345,6 @@ def qa(
         max_answers: int = 0,
         min_score: float = 0.0,
         hosting: str = "cloud",
-        request: Optional[QaRequest] = None,
     ):
         """
         Answers a question about a prompt.
@@ -358,19 +357,8 @@ def qa(
                 Specifies where the computation will take place. This defaults to "cloud", meaning that it can be
                 executed on any of our servers. An error will be returned if the specified hosting is not available.
                 Check available_models() for available hostings.
-
-            request (QaRequest, optional):
-                Input for the answers to be computed
-
         """
-
-        if request is None:
-            logging.warning(
-                "Calling this method with individual request parameters is deprecated. "
-                + "Please pass an QaRequest object as the request parameter instead."
-            )
-
-        named_request = request or QaRequest(
+        named_request = QaRequest(
             query or "",
             documents or [],
             maximum_tokens,
@@ -386,8 +374,7 @@ def qa(
             json=named_request.render_as_body(model, hosting),
             timeout=None,
         )
-        response_json = self._translate_errors(response)
-        return response_json if request is None else QaResponse.from_json(response_json)
+        return self._translate_errors(response)
 
     def _explain(
         self, model: str, request: ExplanationRequest, hosting: Optional[str] = None
diff --git a/aleph_alpha_client/aleph_alpha_model.py b/aleph_alpha_client/aleph_alpha_model.py
index 4969f41..b8954be 100644
--- a/aleph_alpha_client/aleph_alpha_model.py
+++ b/aleph_alpha_client/aleph_alpha_model.py
@@ -3,6 +3,7 @@
 from aleph_alpha_client.detokenization import DetokenizationRequest, DetokenizationResponse
 from aleph_alpha_client.embedding import EmbeddingRequest, EmbeddingResponse
 from aleph_alpha_client.evaluation import EvaluationRequest, EvaluationResponse
+from aleph_alpha_client.qa import QaRequest, QaResponse
 from aleph_alpha_client.tokenization import TokenizationRequest, TokenizationResponse
 
 
@@ -32,3 +33,7 @@ def embed(self, request: EmbeddingRequest) -> EmbeddingResponse:
     def evaluate(self, request: EvaluationRequest) -> EvaluationResponse:
         response_json = self.client.evaluate(model = self.model_name, hosting=self.hosting, **request._asdict())
         return EvaluationResponse.from_json(response_json)
+
+    def qa(self, request: QaRequest) -> QaResponse:
+        response_json = self.client.qa(model = self.model_name, hosting=self.hosting, **request._asdict())
+        return QaResponse.from_json(response_json)
diff --git a/readme.ipynb b/readme.ipynb
index f7fec4f..3228837 100644
--- a/readme.ipynb
+++ b/readme.ipynb
@@ -193,17 +193,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import Document, AlephAlphaClient, QaRequest\n",
+    "from aleph_alpha_client import Document, AlephAlphaClient, AlephAlphaModel, QaRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    # You need to choose a model with qa support for this example.\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
-    "# You need to choose a model with qa support for this example.\n",
-    "model = \"luminous-extended\"\n",
-    "\n",
     "docx_file = \"./tests/sample.docx\"\n",
     "document = Document.from_docx_file(docx_file)\n",
     "\n",
@@ -212,7 +210,7 @@
     "    documents = [document]\n",
     ")\n",
     "\n",
-    "result = client.qa(model, request=request)\n",
+    "result = model.qa(request)\n",
     "\n",
     "print(result)"
    ]
@@ -231,17 +229,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import Document, AlephAlphaClient, QaRequest\n",
+    "from aleph_alpha_client import AlephAlphaClient, AlephAlphaModel, QaRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    # You need to choose a model with qa support for this example.\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
-    "# You need to choose a model with qa support for this example.\n",
-    "model = \"luminous-extended\"\n",
-    "\n",
     "prompt = \"In imperative programming, a computer program is a sequence of instructions in a programming language that a computer can execute or interpret.\"\n",
     "document = Document.from_text(prompt)\n",
     "\n",
@@ -250,7 +246,7 @@
     "    documents = [document],\n",
     ")\n",
     "\n",
-    "result = client.qa(model, request=request)\n",
+    "result = model.qa(request)\n",
     "\n",
     "print(result)"
    ]
@@ -269,17 +265,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from aleph_alpha_client import Document, ImagePrompt, AlephAlphaClient, QaRequest\n",
+    "from aleph_alpha_client import Document, ImagePrompt, AlephAlphaClient, AlephAlphaModel, QaRequest\n",
     "import os\n",
     "\n",
-    "client = AlephAlphaClient(\n",
-    "    host=\"https://api.aleph-alpha.com\",\n",
-    "    token=os.getenv(\"AA_TOKEN\")\n",
+    "model = AlephAlphaModel(\n",
+    "    AlephAlphaClient(host=\"https://api.aleph-alpha.com\", token=os.getenv(\"AA_TOKEN\")),\n",
+    "    # You need to choose a model with qa support for this example.\n",
+    "    model_name = \"luminous-extended\"\n",
     ")\n",
     "\n",
-    "# You need to choose a model with qa support and multimodal capabilities for this example.\n",
-    "model = \"luminous-extended\"\n",
-    "\n",
     "url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/2008-09-24_Blockbuster_in_Durham.jpg/330px-2008-09-24_Blockbuster_in_Durham.jpg\"\n",
     "image = ImagePrompt.from_url(url)\n",
     "prompt = [image]\n",
@@ -290,7 +284,7 @@
     "    documents = [document]\n",
     ")\n",
     "\n",
-    "result = client.qa(model, request=request)\n",
+    "result = model.qa(request)\n",
     "\n",
     "print(result)"
    ]
diff --git a/tests/test_qa.py b/tests/test_qa.py
index d377445..85cfc5b 100644
--- a/tests/test_qa.py
+++ b/tests/test_qa.py
@@ -1,14 +1,15 @@
 import pytest
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
+from aleph_alpha_client.aleph_alpha_model import AlephAlphaModel
 from aleph_alpha_client.document import Document
 from aleph_alpha_client.qa import QaRequest
 
-from tests.common import client, model_name
+from tests.common import client, model_name, model
 
 
-def test_qa(client: AlephAlphaClient, model_name: str):
+def test_qa(model: AlephAlphaModel):
     # given a client
-    assert model_name in map(lambda model: model["name"], client.available_models())
+    assert model.model_name in map(lambda model: model["name"], model.client.available_models())
 
     # when posting a QA request with a QaRequest object
     request = QaRequest(
@@ -16,20 +17,16 @@ def test_qa(client: AlephAlphaClient, model_name: str):
         documents=[Document.from_prompt(["Andreas likes pizza."])],
     )
 
-    response = client.qa(
-        model_name,
-        hosting="cloud",
-        request=request,
-    )
+    response = model.qa(request)
 
     # the response should exist and be in the form of a named tuple class
     assert len(response.answers) == 1
     assert response.model_version is not None
 
 
-def test_qa_no_answer_found(client: AlephAlphaClient, model_name: str):
+def test_qa_no_answer_found(model: AlephAlphaModel):
     # given a client
-    assert model_name in map(lambda model: model["name"], client.available_models())
+    assert model.model_name in map(lambda model: model["name"], model.client.available_models())
 
     # when posting a QA request with a QaRequest object
     request = QaRequest(
@@ -37,18 +34,14 @@ def test_qa_no_answer_found(client: AlephAlphaClient, model_name: str):
         documents=[],
     )
 
-    response = client.qa(
-        model_name,
-        hosting="cloud",
-        request=request,
-    )
+    response = model.qa(request)
 
     # the response should exist and be in the form of a named tuple class
     assert len(response.answers) == 0
     assert response.model_version is not None
 
 
-def test_qa_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
+def test_qa_with_client(client: AlephAlphaClient, model_name: str):
     # given a client
     assert model_name in map(lambda model: model["name"], client.available_models())
 
@@ -65,9 +58,9 @@ def test_qa_with_explicit_parameters(client: AlephAlphaClient, model_name: str):
     assert response["model_version"] is not None
 
 
-def test_qa_fails(client: AlephAlphaClient, model_name: str):
+def test_qa_fails(model: AlephAlphaModel):
     # given a client
-    assert model_name in map(lambda model: model["name"], client.available_models())
+    assert model.model_name in map(lambda model: model["name"], model.client.available_models())
 
     # when posting an illegal request
     request = QaRequest(
@@ -77,10 +70,6 @@ def test_qa_fails(client: AlephAlphaClient, model_name: str):
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        response = client.qa(
-            model_name,
-            hosting="cloud",
-            request=request,
-        )
+        response = model.qa(request)
 
     assert e.value.args[0] == 400

From ccc00a56efd2bb4d85199ae3d70c317777678645 Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 16:48:59 +0200
Subject: [PATCH 09/10] Add explain request to model

---
 aleph_alpha_client/aleph_alpha_client.py |  3 +--
 aleph_alpha_client/aleph_alpha_model.py  |  5 +++++
 tests/test_explanation.py                | 17 +++++++----------
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 5e11e58..008649c 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -383,8 +383,7 @@ def _explain(
         response = requests.post(
             f"{self.host}explain", headers=self.request_headers, json=body
         )
-        response_dict = self._translate_errors(response)
-        return response_dict
+        return self._translate_errors(response)
 
     @staticmethod
     def _translate_errors(response):
diff --git a/aleph_alpha_client/aleph_alpha_model.py b/aleph_alpha_client/aleph_alpha_model.py
index b8954be..fd030e1 100644
--- a/aleph_alpha_client/aleph_alpha_model.py
+++ b/aleph_alpha_client/aleph_alpha_model.py
@@ -1,8 +1,10 @@
+from typing import Any, Mapping
 from aleph_alpha_client.aleph_alpha_client import AlephAlphaClient
 from aleph_alpha_client.completion import CompletionRequest, CompletionResponse
 from aleph_alpha_client.detokenization import DetokenizationRequest, DetokenizationResponse
 from aleph_alpha_client.embedding import EmbeddingRequest, EmbeddingResponse
 from aleph_alpha_client.evaluation import EvaluationRequest, EvaluationResponse
+from aleph_alpha_client.explanation import ExplanationRequest
 from aleph_alpha_client.qa import QaRequest, QaResponse
 from aleph_alpha_client.tokenization import TokenizationRequest, TokenizationResponse
 
@@ -37,3 +39,6 @@ def evaluate(self, request: EvaluationRequest) -> EvaluationResponse:
     def qa(self, request: QaRequest) -> QaResponse:
         response_json = self.client.qa(model = self.model_name, hosting=self.hosting, **request._asdict())
         return QaResponse.from_json(response_json)
+
+    def _explain(self, request: ExplanationRequest) -> Mapping[str, Any]:
+        return self.client._explain(model = self.model_name, hosting=self.hosting, request=request)
diff --git a/tests/test_explanation.py b/tests/test_explanation.py
index 9553900..30ae62a 100644
--- a/tests/test_explanation.py
+++ b/tests/test_explanation.py
@@ -1,10 +1,11 @@
 import pytest
 from aleph_alpha_client import AlephAlphaClient, ExplanationRequest
+from aleph_alpha_client.aleph_alpha_model import AlephAlphaModel
 
-from tests.common import client, model_name
+from tests.common import client, model_name, model
 
 
-def test_explanation(client: AlephAlphaClient, model_name: str):
+def test_explanation(model: AlephAlphaModel):
 
     request = ExplanationRequest(
         prompt=["An apple a day"],
@@ -13,15 +14,15 @@ def test_explanation(client: AlephAlphaClient, model_name: str):
         suppression_factor=0.1,
     )
 
-    explanation = client._explain(model=model_name, request=request, hosting=None)
+    explanation = model._explain(request)
 
     # List is true if not None and not empty
     assert explanation["result"]
 
 
-def test_explain_fails(client: AlephAlphaClient, model_name: str):
+def test_explain_fails(model: AlephAlphaModel):
     # given a client
-    assert model_name in map(lambda model: model["name"], client.available_models())
+    assert model.model_name in map(lambda model: model["name"], model.client.available_models())
 
     # when posting an illegal request
     request = ExplanationRequest(
@@ -34,10 +35,6 @@ def test_explain_fails(client: AlephAlphaClient, model_name: str):
 
     # then we expect an exception tue to a bad request response from the API
     with pytest.raises(ValueError) as e:
-        response = client._explain(
-            model_name,
-            hosting="cloud",
-            request=request,
-        )
+        response = model._explain(request)
 
     assert e.value.args[0] == 400

From ebf6f2e3ab8e5eab8748a91cabceaf14e5350a9f Mon Sep 17 00:00:00 2001
From: Volker Stampa <Volker.Stampa@aleph-alpha.com>
Date: Mon, 27 Jun 2022 17:24:08 +0200
Subject: [PATCH 10/10] Revert back to original client implementation

This ensures backwards compatibility. Just
the assertion are removed to avoid
duplication with server side checks
---
 aleph_alpha_client/aleph_alpha_client.py | 262 +++++++++++++----------
 1 file changed, 152 insertions(+), 110 deletions(-)

diff --git a/aleph_alpha_client/aleph_alpha_client.py b/aleph_alpha_client/aleph_alpha_client.py
index 008649c..1d8a446 100644
--- a/aleph_alpha_client/aleph_alpha_client.py
+++ b/aleph_alpha_client/aleph_alpha_client.py
@@ -1,22 +1,13 @@
 from socket import timeout
-from typing import Any, List, Mapping, Optional, Dict, Sequence, Type, Union
+from typing import List, Optional, Dict, Sequence, Union
 
 import requests
 import logging
 import aleph_alpha_client
-from aleph_alpha_client.completion import CompletionRequest, CompletionResponse
-from aleph_alpha_client.detokenization import (
-    DetokenizationRequest,
-    DetokenizationResponse,
-)
 from aleph_alpha_client.document import Document
-from aleph_alpha_client.embedding import EmbeddingRequest, EmbeddingResponse
-from aleph_alpha_client.evaluation import EvaluationRequest, EvaluationResponse
 from aleph_alpha_client.explanation import ExplanationRequest
 from aleph_alpha_client.image import ImagePrompt
-from aleph_alpha_client.prompt import _to_serializable_prompt
-from aleph_alpha_client.qa import QaRequest, QaResponse
-from aleph_alpha_client.tokenization import TokenizationResponse, TokenizationRequest
+from aleph_alpha_client.prompt import _to_prompt_item, _to_serializable_prompt
 
 POOLING_OPTIONS = ["mean", "max", "last_token", "abs_max"]
 
@@ -75,37 +66,34 @@ def available_models(self):
         return self._translate_errors(response)
 
     def tokenize(
-        self,
-        model: str,
-        prompt: Optional[str] = None,
-        tokens: bool = True,
-        token_ids: bool = True,
-    ) -> Any:
+        self, model: str, prompt: str, tokens: bool = True, token_ids: bool = True
+    ):
         """
         Tokenizes the given prompt for the given model.
         """
-        named_request = TokenizationRequest(prompt or "", tokens, token_ids)
-
+        payload = {
+            "model": model,
+            "prompt": prompt,
+            "tokens": tokens,
+            "token_ids": token_ids,
+        }
         response = requests.post(
             self.host + "tokenize",
             headers=self.request_headers,
-            json=named_request.render_as_body(model),
+            json=payload,
+            timeout=None,
         )
         return self._translate_errors(response)
 
-    def detokenize(
-        self,
-        model: str,
-        token_ids: List[int] = [],
-    ):
+    def detokenize(self, model: str, token_ids: List[int]):
         """
         Detokenizes the given tokens.
         """
-        named_request = DetokenizationRequest(token_ids)
+        payload = {"model": model, "token_ids": token_ids}
         response = requests.post(
             self.host + "detokenize",
             headers=self.request_headers,
-            json=named_request.render_as_body(model),
+            json=payload,
             timeout=None,
         )
         return self._translate_errors(response)
@@ -113,23 +101,23 @@ def detokenize(
     def complete(
         self,
         model: str,
-        prompt: Optional[List[Union[str, ImagePrompt]]] = None,
+        prompt: Union[str, List[Union[str, ImagePrompt]]] = "",
         hosting: str = "cloud",
-        maximum_tokens: int = 64,
-        temperature: float = 0.0,
-        top_k: int = 0,
-        top_p: float = 0.0,
-        presence_penalty: float = 0.0,
-        frequency_penalty: float = 0.0,
-        repetition_penalties_include_prompt: bool = False,
-        use_multiplicative_presence_penalty: bool = False,
+        maximum_tokens: Optional[int] = 64,
+        temperature: Optional[float] = 0.0,
+        top_k: Optional[int] = 0,
+        top_p: Optional[float] = 0.0,
+        presence_penalty: Optional[float] = 0.0,
+        frequency_penalty: Optional[float] = 0.0,
+        repetition_penalties_include_prompt: Optional[bool] = False,
+        use_multiplicative_presence_penalty: Optional[bool] = False,
         best_of: Optional[int] = None,
-        n: int = 1,
+        n: Optional[int] = 1,
         logit_bias: Optional[Dict[int, float]] = None,
         log_probs: Optional[int] = None,
         stop_sequences: Optional[List[str]] = None,
-        tokens: bool = False,
-        disable_optimizations: bool = False,
+        tokens: Optional[bool] = False,
+        disable_optimizations: Optional[bool] = False,
     ):
         """
         Generates samples from a prompt.
@@ -147,12 +135,7 @@ def complete(
                 Check available_models() for available hostings.
 
             maximum_tokens (int, optional, default 64):
-                The maximum number of tokens to be generated.
-                Completion will terminate after the maximum number of tokens is reached.
-                Increase this value to generate longer texts. A text is split into tokens.
-                Usually there are more tokens than words.
-                The maximum supported number of tokens depends on the model (for luminous-base, it may not exceed 2048 tokens).
-                The prompt's tokens plus the maximum_tokens request must not exceed this number.
+                The maximum number of tokens to be generated. Completion will terminate after the maximum number of tokens is reached. Increase this value to generate longer texts. A text is split into tokens. Usually there are more tokens than words. The summed number of tokens of prompt and maximum_tokens depends on the model (for luminous-base, it may not exceed 2048 tokens).
 
             temperature (float, optional, default 0.0)
                 A higher sampling temperature encourages the model to produce less probable outputs ("be more creative"). Values are expected in a range from 0.0 to 1.0. Try high values (e.g. 0.9) for a more "creative" response and the default 0.0 for a well defined and repeatable answer.
@@ -206,29 +189,44 @@ def complete(
                 Our goal is to improve your results while using our API. But you can always pass disable_optimizations: true and we will leave your prompt and completion untouched.
         """
 
-        named_request = CompletionRequest(
-            prompt=prompt or [""],
-            maximum_tokens=maximum_tokens,
-            temperature=temperature,
-            top_k=top_k,
-            top_p=top_p,
-            presence_penalty=presence_penalty,
-            frequency_penalty=frequency_penalty,
-            best_of=best_of,
-            n=n,
-            logit_bias=logit_bias,
-            log_probs=log_probs,
-            repetition_penalties_include_prompt=repetition_penalties_include_prompt,
-            use_multiplicative_presence_penalty=use_multiplicative_presence_penalty,
-            stop_sequences=stop_sequences,
-            tokens=tokens,
-            disable_optimizations=disable_optimizations,
-        )
+        # validate data types
+        if not isinstance(model, str):
+            raise ValueError("model must be a string")
+
+        if isinstance(temperature, int):
+            temperature = float(temperature)
+        if isinstance(top_p, int):
+            top_p = float(top_p)
+        if isinstance(presence_penalty, int):
+            presence_penalty = float(presence_penalty)
+        if isinstance(frequency_penalty, int):
+            frequency_penalty = float(frequency_penalty)
+
+        payload = {
+            "model": model,
+            "prompt": _to_serializable_prompt(prompt=prompt),
+            "hosting": hosting,
+            "maximum_tokens": maximum_tokens,
+            "temperature": temperature,
+            "top_k": top_k,
+            "top_p": top_p,
+            "presence_penalty": presence_penalty,
+            "frequency_penalty": frequency_penalty,
+            "best_of": best_of,
+            "n": n,
+            "logit_bias": logit_bias,
+            "log_probs": log_probs,
+            "repetition_penalties_include_prompt": repetition_penalties_include_prompt,
+            "use_multiplicative_presence_penalty": use_multiplicative_presence_penalty,
+            "stop_sequences": stop_sequences,
+            "tokens": tokens,
+            "disable_optimizations": disable_optimizations,
+        }
 
         response = requests.post(
             self.host + "complete",
             headers=self.request_headers,
-            json=named_request.render_as_body(model, hosting),
+            json=payload,
             timeout=None,
         )
         response_json = self._translate_errors(response)
@@ -242,15 +240,15 @@ def complete(
     def embed(
         self,
         model,
-        prompt: Union[str, Sequence[Union[str, ImagePrompt]]] = None,
-        pooling: Optional[List[str]] = None,
-        layers: Optional[List[int]] = None,
+        prompt: Union[str, Sequence[Union[str, ImagePrompt]]],
+        pooling: List[str],
+        layers: List[int],
         hosting: str = "cloud",
         tokens: Optional[bool] = False,
         type: Optional[str] = None,
     ):
         """
-        Embeds a multi-modal prompt and returns vectors that can be used for downstream tasks (e.g. semantic similarity) and models (e.g. classifiers).
+        Embeds a text and returns vectors that can be used for downstream tasks (e.g. semantic similarity) and models (e.g. classifiers).
 
         Parameters:
             model (str, required):
@@ -259,6 +257,12 @@ def embed(
             prompt (str, required):
                The text to be embedded.
 
+            layers (List[int], required):
+               A list of layer indices from which to return embeddings.
+                    * Index 0 corresponds to the word embeddings used as input to the first transformer layer
+                    * Index 1 corresponds to the hidden state as output by the first transformer layer, index 2 to the output of the second layer etc.
+                    * Index -1 corresponds to the last transformer layer (not the language modelling head), index -2 to the second last layer etc.
+
             pooling (List[str])
                 Pooling operation to use.
                 Pooling operations include:
@@ -267,12 +271,6 @@ def embed(
                     * last_token: just use the last token
                     * abs_max: aggregate token embeddings across the sequence dimension using a maximum of absolute values
 
-            layers (List[int], required):
-               A list of layer indices from which to return embeddings.
-                    * Index 0 corresponds to the word embeddings used as input to the first transformer layer
-                    * Index 1 corresponds to the hidden state as output by the first transformer layer, index 2 to the output of the second layer etc.
-                    * Index -1 corresponds to the last transformer layer (not the language modelling head), index -2 to the second last layer etc.
-
             hosting (str, optional, default "cloud"):
                 Specifies where the computation will take place. This defaults to "cloud", meaning that it can be
                 executed on any of our servers. An error will be returned if the specified hosting is not available.
@@ -285,23 +283,32 @@ def embed(
                 Type of the embedding (e.g. symmetric or asymmetric)
 
         """
-        named_request = EmbeddingRequest(
-            prompt=prompt or "",
-            layers=layers or [],
-            pooling=pooling or [],
-            type=type or None,
-            tokens=tokens or False,
+
+        serializable_prompt = _to_serializable_prompt(
+            prompt=prompt, at_least_one_token=True
         )
-        body = named_request.render_as_body(model, hosting)
+
+        if tokens is None:
+            tokens = False
+
+        payload = {
+            "model": model,
+            "prompt": serializable_prompt,
+            "hosting": hosting,
+            "layers": layers,
+            "tokens": tokens,
+            "pooling": pooling,
+            "type": type,
+        }
         response = requests.post(
-            f"{self.host}embed", headers=self.request_headers, json=body
+            self.host + "embed", headers=self.request_headers, json=payload
         )
         return self._translate_errors(response)
 
     def evaluate(
         self,
         model,
-        completion_expected: str = None,
+        completion_expected,
         hosting: str = "cloud",
         prompt: Union[str, List[Union[str, ImagePrompt]]] = "",
     ):
@@ -324,21 +331,24 @@ def evaluate(
                 The text to be completed. Unconditional completion can be used with an empty string (default). The prompt may contain a zero shot or few shot task.
         """
 
-        named_request = EvaluationRequest(
-            prompt=prompt, completion_expected=completion_expected or ""
-        )
+        serializable_prompt = _to_serializable_prompt(prompt=prompt)
+
+        payload = {
+            "model": model,
+            "prompt": serializable_prompt,
+            "hosting": hosting,
+            "completion_expected": completion_expected,
+        }
         response = requests.post(
-            self.host + "evaluate",
-            headers=self.request_headers,
-            json=named_request.render_as_body(model, hosting),
+            self.host + "evaluate", headers=self.request_headers, json=payload
         )
         return self._translate_errors(response)
 
     def qa(
         self,
         model: str,
-        query: Optional[str] = None,
-        documents: Optional[Sequence[Document]] = None,
+        query: str,
+        documents: List[Document],
         maximum_tokens: int = 64,
         max_chunk_size: int = 175,
         disable_optimizations: bool = False,
@@ -353,37 +363,69 @@ def qa(
             model (str, required):
                 Name of model to use. A model name refers to a model architecture (number of parameters among others). Always the latest version of model is used. The model output contains information as to the model version.
 
-            hosting (str, optional, default "cloud"):
+            query (str, required):
+                The question to be answered about the documents by the model.
+
+            documents (List[Document], required):
+                A list of documents. This can be either docx documents or text/image prompts.
+
+            maximum_tokens (int, default 64):
+                The maximum number of tokens to be generated. Completion will terminate after the maximum number of tokens is reached.
+
+                Increase this value to generate longer texts. A text is split into tokens. Usually there are more tokens than words. The summed number of tokens of prompt and maximum_tokens depends on the model (for luminous-base, it may not exceed 2048 tokens).
+
+            max_chunk_size (int, default 175):
+                Long documents will be split into chunks if they exceed max_chunk_size.
+                The splitting will be done along the following boundaries until all chunks are shorter than max_chunk_size or all splitting criteria have been exhausted.
+                The splitting boundaries are, in the given order:
+                1. Split first by double newline
+                (assumed to mark the boundary between 2 paragraphs).
+                2. Split paragraphs that are still too long by their median sentence as long as we can still find multiple sentences in the paragraph.
+                3. Split each remaining chunk of a paragraph or sentence further along white spaces until each chunk is smaller than max_chunk_size or until no whitespace can be found anymore.
+
+            disable_optimizations  (bool, default False)
+                We continually research optimal ways to work with our models. By default, we apply these optimizations to both your query, documents, and answers for you.
+                Our goal is to improve your results while using our API. But you can always pass `disable_optimizations: true` and we will leave your query, documents, and answers untouched.
+
+            max_answers (int, default 0):
+                The upper limit of maximum number of answers.
+
+            min_score (float, default 0.0):
+                The lower limit of minimum score for every answer.
+
+            hosting (str, default "cloud"):
                 Specifies where the computation will take place. This defaults to "cloud", meaning that it can be
                 executed on any of our servers. An error will be returned if the specified hosting is not available.
                 Check available_models() for available hostings.
         """
-        named_request = QaRequest(
-            query or "",
-            documents or [],
-            maximum_tokens,
-            max_chunk_size,
-            disable_optimizations,
-            max_answers,
-            min_score,
-        )
+
+        payload = {
+            "model": model,
+            "query": query,
+            "documents": [document._to_serializable_document() for document in documents],
+            "maximum_tokens": maximum_tokens,
+            "max_answers": max_answers,
+            "min_score": min_score,
+            "max_chunk_size": max_chunk_size,
+            "disable_optimizations": disable_optimizations,
+            "hosting": hosting,
+        }
 
         response = requests.post(
             self.host + "qa",
             headers=self.request_headers,
-            json=named_request.render_as_body(model, hosting),
+            json=payload,
             timeout=None,
         )
-        return self._translate_errors(response)
+        response_json = self._translate_errors(response)
+        return response_json
 
-    def _explain(
-        self, model: str, request: ExplanationRequest, hosting: Optional[str] = None
-    ):
+    def _explain(self, model: str, request: ExplanationRequest, hosting: Optional[str] = None):
         body = request.render_as_body(model, hosting)
-        response = requests.post(
-            f"{self.host}explain", headers=self.request_headers, json=body
-        )
-        return self._translate_errors(response)
+        response = requests.post(f"{self.host}explain", headers=self.request_headers, json=body)
+        response_dict = self._translate_errors(response)
+        return response_dict
+        
 
     @staticmethod
     def _translate_errors(response):