From f1d6cf60dfa68a4d84008b7a4963149c79361b24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20K=C3=B6hnecke?=
 <niklas.koehnecke@ext.aleph-alpha.com>
Date: Thu, 12 Dec 2024 09:08:09 +0100
Subject: [PATCH] fix: notebooks and env in ci

---
 .env.example                                  |  1 +
 .github/workflows/sdk-tests.yml               |  8 ++---
 docker-compose.yaml                           |  3 +-
 src/documentation/document_index.ipynb        | 10 +++---
 src/documentation/elo_qa_eval.ipynb           | 13 +++-----
 src/documentation/evaluate_with_studio.ipynb  | 18 ++++-------
 .../how_tos/how_to_implement_a_task.ipynb     | 32 ++++++++++---------
 .../studio/how_to_execute_a_benchmark.ipynb   |  2 +-
 src/documentation/qa.ipynb                    |  8 ++---
 .../connectors/limited_concurrency_client.py  |  6 ++--
 10 files changed, 49 insertions(+), 52 deletions(-)

diff --git a/.env.example b/.env.example
index a7f982c22..fb3ccf8dc 100644
--- a/.env.example
+++ b/.env.example
@@ -14,6 +14,7 @@ POSTGRES_PASSWORD=test
 
 # things to adapt
 CLIENT_URL=...
+AUTHORIZATION_SERVICE_URL=...
 AA_TOKEN=token
 DATA_SERVICE_URL=...
 DOCUMENT_INDEX_URL=...
diff --git a/.github/workflows/sdk-tests.yml b/.github/workflows/sdk-tests.yml
index b0358e6eb..7f0a44c04 100644
--- a/.github/workflows/sdk-tests.yml
+++ b/.github/workflows/sdk-tests.yml
@@ -147,9 +147,9 @@ jobs:
           POSTGRES_DB: "il_sdk"
           POSTGRES_USER: "il_sdk"
           POSTGRES_PASSWORD: "test"
-          AUTHORIZATION_SERVICE_URL: "none"
+          AUTHORIZATION_SERVICE_URL: ${{ secrets.AUTHORIZATION_SERVICE_URL }}
           AA_TOKEN: ${{ secrets.AA_TOKEN }}
-          API_SCHEDULER_URL: "https://api.aleph-alpha.com"
+          API_SCHEDULER_URL: ${{ secrets.CLIENT_URL }}
           DATA_SERVICE_URL: ${{secrets.DATA_SERVICE_URL}}
         credentials:
           username: "unused"
@@ -235,9 +235,9 @@ jobs:
           POSTGRES_DB: "il_sdk"
           POSTGRES_USER: "il_sdk"
           POSTGRES_PASSWORD: "test"
-          AUTHORIZATION_SERVICE_URL: "none"
+          AUTHORIZATION_SERVICE_URL: ${{ secrets.AUTHORIZATION_SERVICE_URL }}
           AA_TOKEN: ${{ secrets.AA_TOKEN }}
-          API_SCHEDULER_URL: "https://api.aleph-alpha.com"
+          API_SCHEDULER_URL: ${{ secrets.CLIENT_URL }}
           DATA_SERVICE_URL: ${{secrets.DATA_SERVICE_URL}}
         credentials:
           username: "unused"
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 161089f9a..67ac0f24a 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -71,8 +71,7 @@ services:
     env_file: ".env" # mainly for AA-TOKEN, DB User/PW
     environment:
       POSTGRES_HOST: postgres
-      AUTHORIZATION_SERVICE_URL: "none"
-      API_SCHEDULER_URL: "https://api.aleph-alpha.com"
+      API_SCHEDULER_URL: ${CLIENT_URL}
   postgres:
     image: postgres:15
     ports:
diff --git a/src/documentation/document_index.ipynb b/src/documentation/document_index.ipynb
index 8c4f1b97e..aae97a93f 100644
--- a/src/documentation/document_index.ipynb
+++ b/src/documentation/document_index.ipynb
@@ -25,7 +25,7 @@
     "    LimitedConcurrencyClient,\n",
     "    SemanticEmbed,\n",
     ")\n",
-    "from intelligence_layer.core import InMemoryTracer\n",
+    "from intelligence_layer.core import InMemoryTracer, LuminousControlModel\n",
     "from intelligence_layer.examples import MultipleChunkRetrieverQa, RetrieverBasedQaInput\n",
     "\n",
     "load_dotenv()"
@@ -628,7 +628,9 @@
    "outputs": [],
    "source": [
     "client = LimitedConcurrencyClient.from_env()\n",
-    "retriever_qa = MultipleChunkRetrieverQa(document_index_retriever, insert_chunk_number=3)\n",
+    "retriever_qa = MultipleChunkRetrieverQa(\n",
+    "    document_index_retriever, insert_chunk_number=3, model=LuminousControlModel()\n",
+    ")\n",
     "\n",
     "\n",
     "input = RetrieverBasedQaInput(\n",
@@ -661,7 +663,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "intelligence-layer-LP3DLT23-py3.12",
    "language": "python",
    "name": "python3"
   },
@@ -675,7 +677,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,
diff --git a/src/documentation/elo_qa_eval.ipynb b/src/documentation/elo_qa_eval.ipynb
index 920f6e012..ef2b2c55e 100644
--- a/src/documentation/elo_qa_eval.ipynb
+++ b/src/documentation/elo_qa_eval.ipynb
@@ -27,9 +27,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from os import getenv\n",
-    "\n",
-    "from aleph_alpha_client import Client\n",
     "from dotenv import load_dotenv\n",
     "\n",
     "from intelligence_layer.connectors import LimitedConcurrencyClient\n",
@@ -56,8 +53,7 @@
     "\n",
     "load_dotenv()\n",
     "\n",
-    "aa_client = Client(getenv(\"AA_TOKEN\"))\n",
-    "limited_concurrency_client = LimitedConcurrencyClient(aa_client, max_retry_time=60)"
+    "aa_client = limited_concurrency_client = LimitedConcurrencyClient.from_env()"
    ]
   },
   {
@@ -205,7 +201,7 @@
    "source": [
     "models = [\n",
     "    LuminousControlModel(name=\"luminous-base-control\", client=aa_client),\n",
-    "    LuminousControlModel(name=\"luminous-supreme-control\", client=aa_client),\n",
+    "    Llama3InstructModel(name=\"llama-3.1-8b-instruct\", client=aa_client),\n",
     "]\n",
     "\n",
     "for model in models:\n",
@@ -292,6 +288,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# Here we evaluate with the same model as we want to evaluate for the evaluation.\n",
+    "# This includes a significant bias and is generally less recommended.\n",
     "elo_qa_evaluation_logic = EloQaEvaluationLogic(\n",
     "    model=Llama3InstructModel(name=\"llama-3.1-8b-instruct\")\n",
     ")\n",
@@ -450,8 +448,7 @@
    "outputs": [],
    "source": [
     "newly_added_models = [\n",
-    "    LuminousControlModel(name=\"luminous-base-control-20230501\", client=aa_client),\n",
-    "    LuminousControlModel(name=\"luminous-supreme-control-20230501\", client=aa_client),\n",
+    "    Llama3InstructModel(name=\"llama-3.1-70b-instruct\", client=aa_client),\n",
     "]\n",
     "\n",
     "for model in newly_added_models:\n",
diff --git a/src/documentation/evaluate_with_studio.ipynb b/src/documentation/evaluate_with_studio.ipynb
index 06261c588..2773e5f82 100644
--- a/src/documentation/evaluate_with_studio.ipynb
+++ b/src/documentation/evaluate_with_studio.ipynb
@@ -84,13 +84,6 @@
     "Therefore, let's check out what it looks like."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -126,14 +119,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "all_labels = list(set(item[\"label\"] for item in data))\n",
+    "# we grab only a subset of the data here to speed up the evaluation. Remove the index to run on all example datapoints.\n",
+    "subset_of_data = data[:5]\n",
+    "\n",
+    "all_labels = list(set(item[\"label\"] for item in subset_of_data))\n",
     "dataset = studio_dataset_repository.create_dataset(\n",
     "    examples=[\n",
     "        Example(\n",
     "            input=ClassifyInput(chunk=TextChunk(item[\"message\"]), labels=all_labels),\n",
     "            expected_output=item[\"label\"],\n",
     "        )\n",
-    "        for item in data\n",
+    "        for item in subset_of_data\n",
     "    ],\n",
     "    dataset_name=\"Single Label Classify Dataset\",\n",
     ")\n",
@@ -281,7 +277,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "intelligence-layer-ZqHLMTHE-py3.12",
+   "display_name": "intelligence-layer-LP3DLT23-py3.12",
    "language": "python",
    "name": "python3"
   },
@@ -295,7 +291,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,
diff --git a/src/documentation/how_tos/how_to_implement_a_task.ipynb b/src/documentation/how_tos/how_to_implement_a_task.ipynb
index 54c7228cc..e4dee160a 100644
--- a/src/documentation/how_tos/how_to_implement_a_task.ipynb
+++ b/src/documentation/how_tos/how_to_implement_a_task.ipynb
@@ -11,8 +11,9 @@
     "\n",
     "from intelligence_layer.core import (\n",
     "    CompleteInput,\n",
-    "    LuminousControlModel,\n",
+    "    ControlModel,\n",
     "    NoOpTracer,\n",
+    "    Pharia1ChatModel,\n",
     "    Task,\n",
     "    TaskSpan,\n",
     ")\n",
@@ -62,7 +63,8 @@
     "    joke: str\n",
     "\n",
     "\n",
-    "# Step 1 - we want a control model but do not care otherwise. Therefore we use the default.\n",
+    "# Step 1 - we want a control model but do not care otherwise. Therefore we use the default. For our case, the Chat models also work.\n",
+    "model_to_use = Pharia1ChatModel()\n",
     "\n",
     "\n",
     "# Step 2\n",
@@ -70,8 +72,8 @@
     "    PROMPT_TEMPLATE: str = \"\"\"Tell me a joke about the following topic:\"\"\"\n",
     "\n",
     "    # Step 2.1\n",
-    "    def __init__(self, model: LuminousControlModel | None = None) -> None:\n",
-    "        self._model = model if model else LuminousControlModel()\n",
+    "    def __init__(self, model: ControlModel | None = None) -> None:\n",
+    "        self._model = model if model else Pharia1ChatModel()\n",
     "\n",
     "    # Step 2.2\n",
     "    def do_run(\n",
@@ -85,7 +87,9 @@
     "        return TellAJokeTaskOutput(joke=completion.completions[0].completion)\n",
     "\n",
     "\n",
-    "TellAJokeTask().run(TellAJokeTaskInput(topic=\"Software Engineers\"), NoOpTracer())"
+    "TellAJokeTask(model=model_to_use).run(\n",
+    "    TellAJokeTaskInput(topic=\"Software Engineers\"), NoOpTracer()\n",
+    ")"
    ]
   },
   {
@@ -109,6 +113,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from intelligence_layer.core.model import LuminousControlModel\n",
+    "\n",
+    "\n",
     "class PeopleExtractorInput(BaseModel):\n",
     "    text_passage: str\n",
     "\n",
@@ -142,20 +149,15 @@
     "task_input = PeopleExtractorInput(\n",
     "    text_passage=\"Peter ate Sarahs Lunch, their teacher Mr. Meyers was very angry with him.'\"\n",
     ")\n",
-    "PeopleExtractor().run(task_input, NoOpTracer()).answer"
+    "PeopleExtractor(task=SingleChunkQa(model=LuminousControlModel())).run(\n",
+    "    task_input, NoOpTracer()\n",
+    ").answer"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "intelligence-layer-LP3DLT23-py3.12",
    "language": "python",
    "name": "python3"
   },
@@ -169,7 +171,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,
diff --git a/src/documentation/how_tos/studio/how_to_execute_a_benchmark.ipynb b/src/documentation/how_tos/studio/how_to_execute_a_benchmark.ipynb
index 3813bdd52..5d96397f0 100644
--- a/src/documentation/how_tos/studio/how_to_execute_a_benchmark.ipynb
+++ b/src/documentation/how_tos/studio/how_to_execute_a_benchmark.ipynb
@@ -91,7 +91,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "intelligence-layer-LP3DLT23-py3.12",
    "language": "python",
    "name": "python3"
   },
diff --git a/src/documentation/qa.ipynb b/src/documentation/qa.ipynb
index a61dcbb30..6e0701be7 100644
--- a/src/documentation/qa.ipynb
+++ b/src/documentation/qa.ipynb
@@ -97,7 +97,7 @@
     "input = SingleChunkQaInput(chunk=text, question=question, generate_highlights=True)\n",
     "\n",
     "# Define a LuminousControlModel and instantiate a SingleChunkQa task\n",
-    "model = LuminousControlModel(name=\"luminous-supreme-control\")\n",
+    "model = LuminousControlModel(name=\"luminous-base-control\")\n",
     "single_chunk_qa = SingleChunkQa(model=model)\n",
     "\n",
     "output = single_chunk_qa.run(input, NoOpTracer())\n",
@@ -369,7 +369,7 @@
     "question = \"What is the name of the book about Robert Moses?\"\n",
     "input = LongContextQaInput(text=long_text, question=question)\n",
     "\n",
-    "long_context_qa = LongContextQa()\n",
+    "long_context_qa = LongContextQa(model=model)\n",
     "tracer = InMemoryTracer()\n",
     "output = long_context_qa.run(input, tracer=tracer)"
    ]
@@ -406,7 +406,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "intelligence-layer-LP3DLT23-py3.12",
    "language": "python",
    "name": "python3"
   },
@@ -420,7 +420,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,
diff --git a/src/intelligence_layer/connectors/limited_concurrency_client.py b/src/intelligence_layer/connectors/limited_concurrency_client.py
index 1653d6d5f..9ffa74b5f 100644
--- a/src/intelligence_layer/connectors/limited_concurrency_client.py
+++ b/src/intelligence_layer/connectors/limited_concurrency_client.py
@@ -142,9 +142,9 @@ def from_env(
             assert token, "Define environment variable AA_TOKEN with a valid token for the Aleph Alpha API"
         if host is None:
             host = getenv("CLIENT_URL")
-            if not host:
-                host = "https://api.aleph-alpha.com"
-                print(f"No CLIENT_URL specified in environment, using default: {host}.")
+            assert (
+                host
+            ), "Define CLIENT_URL with a valid url pointing towards your inference API."
 
         return cls(Client(token, host=host))