From e9d6bfa8b9c0d6ff790b1d57001e88e760f7bb22 Mon Sep 17 00:00:00 2001
From: FelixFehse <felix.fehse@tngtech.com>
Date: Wed, 27 Mar 2024 10:30:00 +0100
Subject: [PATCH] IL-407 all passing

---
 .../how_to_run_a_task_on_a_dataset.ipynb      |  2 ++
 .../dataset/file_dataset_repository.py        |  7 ++--
 .../evaluation/run/file_run_repository.py     |  6 ++++
 .../run/in_memory_run_repository.py           |  5 +++
 tests/evaluation/test_run_repository.py       | 34 ++++++++++++++-----
 5 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/src/examples/how-tos/how_to_run_a_task_on_a_dataset.ipynb b/src/examples/how-tos/how_to_run_a_task_on_a_dataset.ipynb
index 8b8bf9aba..887078213 100644
--- a/src/examples/how-tos/how_to_run_a_task_on_a_dataset.ipynb
+++ b/src/examples/how-tos/how_to_run_a_task_on_a_dataset.ipynb
@@ -35,6 +35,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "%%script false --no-raise-error # the following code does not execute as the dataset does not exist\n",
+    "\n",
     "# Step 0\n",
     "dataset_id = \"my-dataset-id\"\n",
     "\n",
diff --git a/src/intelligence_layer/evaluation/dataset/file_dataset_repository.py b/src/intelligence_layer/evaluation/dataset/file_dataset_repository.py
index ea29ab070..833abce87 100644
--- a/src/intelligence_layer/evaluation/dataset/file_dataset_repository.py
+++ b/src/intelligence_layer/evaluation/dataset/file_dataset_repository.py
@@ -78,16 +78,15 @@ def example(
         expected_output_type: type[ExpectedOutput],
     ) -> Optional[Example[Input, ExpectedOutput]]:
         example_path = self._dataset_examples_path(dataset_id)
-        example_path_str = self.path_to_str(self._dataset_examples_path(dataset_id))
-        if not self._file_system.exists(self.path_to_str(example_path.parent)):
+        if not self.exists(example_path.parent):
             raise ValueError(
                 f"Repository does not contain a dataset with id: {dataset_id}"
             )
-        if not self._file_system.exists(example_path_str):
+        if not self.exists(example_path):
             return None
 
         with self._file_system.open(
-            example_path_str, "r", encoding="utf-8"
+            self.path_to_str(example_path), "r", encoding="utf-8"
         ) as examples_file:
             for example in examples_file:
                 # mypy does not accept dynamic types
diff --git a/src/intelligence_layer/evaluation/run/file_run_repository.py b/src/intelligence_layer/evaluation/run/file_run_repository.py
index 0f92b4ebf..d7bf2448c 100644
--- a/src/intelligence_layer/evaluation/run/file_run_repository.py
+++ b/src/intelligence_layer/evaluation/run/file_run_repository.py
@@ -26,6 +26,8 @@ def store_run_overview(self, overview: RunOverview) -> None:
             overview.model_dump_json(indent=2),
             create_parents=True,
         )
+        # create empty folder just in case no examples are ever saved
+        self.mkdir(self._run_directory(overview.id))
 
     def run_overview(self, run_id: str) -> Optional[RunOverview]:
         file_path = self._run_overview_path(run_id)
@@ -49,6 +51,10 @@ def store_example_output(self, example_output: ExampleOutput[Output]) -> None:
     def example_output(
         self, run_id: str, example_id: str, output_type: type[Output]
     ) -> Optional[ExampleOutput[Output]]:
+        path = self._run_output_directory(run_id)
+        if not self.exists(path):
+            raise ValueError(f"Repository does not contain a run with id: {run_id}")
+
         file_path = self._example_output_path(run_id, example_id)
         if not self.exists(file_path):
             return None
diff --git a/src/intelligence_layer/evaluation/run/in_memory_run_repository.py b/src/intelligence_layer/evaluation/run/in_memory_run_repository.py
index 81066b78f..943a7d466 100644
--- a/src/intelligence_layer/evaluation/run/in_memory_run_repository.py
+++ b/src/intelligence_layer/evaluation/run/in_memory_run_repository.py
@@ -23,6 +23,8 @@ def __init__(self) -> None:
 
     def store_run_overview(self, overview: RunOverview) -> None:
         self._run_overviews[overview.id] = overview
+        if overview.id not in self._example_outputs.keys():
+            self._example_outputs[overview.id] = []
 
     def run_overview(self, run_id: str) -> Optional[RunOverview]:
         return self._run_overviews.get(run_id, None)
@@ -38,6 +40,9 @@ def store_example_output(self, example_output: ExampleOutput[Output]) -> None:
     def example_output(
         self, run_id: str, example_id: str, output_type: type[Output]
     ) -> Optional[ExampleOutput[Output]]:
+        if run_id not in self._example_outputs.keys():
+            raise ValueError(f"Repository does not contain a run with id: {run_id}")
+
         if run_id not in self._example_outputs.keys():
             return None
 
diff --git a/tests/evaluation/test_run_repository.py b/tests/evaluation/test_run_repository.py
index 2316420c3..d9edca43b 100644
--- a/tests/evaluation/test_run_repository.py
+++ b/tests/evaluation/test_run_repository.py
@@ -2,6 +2,7 @@
 from typing import Iterable, Sequence, cast
 from uuid import uuid4
 
+import pytest
 from _pytest.fixtures import FixtureRequest
 from pytest import fixture, mark
 
@@ -69,7 +70,7 @@ def test_run_repository_stores_and_returns_example_output(
     "repository_fixture",
     test_repository_fixtures,
 )
-def test_example_output_returns_none_for_not_existing_ids(
+def test_example_output_returns_none_for_not_existing_example_id(
     repository_fixture: str,
     request: FixtureRequest,
 ) -> None:
@@ -79,15 +80,32 @@ def test_example_output_returns_none_for_not_existing_ids(
     example_output = ExampleOutput(run_id=run_id, example_id=example_id, output=None)
     run_repository.store_example_output(example_output)
 
-    stored_example_outputs = [
-        run_repository.example_output("not-existing-run-id", example_id, type(None)),
-        run_repository.example_output(run_id, "not-existing-example-id", type(None)),
+    assert (
+        run_repository.example_output(run_id, "not-existing-example-id", type(None))
+        is None
+    )
+
+
+@mark.parametrize(
+    "repository_fixture",
+    test_repository_fixtures,
+)
+def test_example_output_returns_none_for_not_existing_run_id(
+    repository_fixture: str,
+    request: FixtureRequest,
+) -> None:
+    run_repository: RunRepository = request.getfixturevalue(repository_fixture)
+    run_id = "run-id"
+    example_id = "example-id"
+    example_output = ExampleOutput(run_id=run_id, example_id=example_id, output=None)
+    run_repository.store_example_output(example_output)
+
+    with pytest.raises(ValueError):
+        run_repository.example_output("not-existing-run-id", example_id, type(None))
+    with pytest.raises(ValueError):
         run_repository.example_output(
             "not-existing-run-id", "not-existing-example-id", type(None)
-        ),
-    ]
-
-    assert stored_example_outputs == [None, None, None]
+        )
 
 
 @mark.parametrize(