From e9d6bfa8b9c0d6ff790b1d57001e88e760f7bb22 Mon Sep 17 00:00:00 2001 From: FelixFehse Date: Wed, 27 Mar 2024 10:30:00 +0100 Subject: [PATCH] IL-407 all passing --- .../how_to_run_a_task_on_a_dataset.ipynb | 2 ++ .../dataset/file_dataset_repository.py | 7 ++-- .../evaluation/run/file_run_repository.py | 6 ++++ .../run/in_memory_run_repository.py | 5 +++ tests/evaluation/test_run_repository.py | 34 ++++++++++++++----- 5 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/examples/how-tos/how_to_run_a_task_on_a_dataset.ipynb b/src/examples/how-tos/how_to_run_a_task_on_a_dataset.ipynb index 8b8bf9aba..887078213 100644 --- a/src/examples/how-tos/how_to_run_a_task_on_a_dataset.ipynb +++ b/src/examples/how-tos/how_to_run_a_task_on_a_dataset.ipynb @@ -35,6 +35,8 @@ "metadata": {}, "outputs": [], "source": [ + "%%script false --no-raise-error # the following code does not execute as the dataset does not exist\n", + "\n", "# Step 0\n", "dataset_id = \"my-dataset-id\"\n", "\n", diff --git a/src/intelligence_layer/evaluation/dataset/file_dataset_repository.py b/src/intelligence_layer/evaluation/dataset/file_dataset_repository.py index ea29ab070..833abce87 100644 --- a/src/intelligence_layer/evaluation/dataset/file_dataset_repository.py +++ b/src/intelligence_layer/evaluation/dataset/file_dataset_repository.py @@ -78,16 +78,15 @@ def example( expected_output_type: type[ExpectedOutput], ) -> Optional[Example[Input, ExpectedOutput]]: example_path = self._dataset_examples_path(dataset_id) - example_path_str = self.path_to_str(self._dataset_examples_path(dataset_id)) - if not self._file_system.exists(self.path_to_str(example_path.parent)): + if not self.exists(example_path.parent): raise ValueError( f"Repository does not contain a dataset with id: {dataset_id}" ) - if not self._file_system.exists(example_path_str): + if not self.exists(example_path): return None with self._file_system.open( - example_path_str, "r", encoding="utf-8" + self.path_to_str(example_path), "r", encoding="utf-8" ) as examples_file: for example in examples_file: # mypy does not accept dynamic types diff --git a/src/intelligence_layer/evaluation/run/file_run_repository.py b/src/intelligence_layer/evaluation/run/file_run_repository.py index 0f92b4ebf..d7bf2448c 100644 --- a/src/intelligence_layer/evaluation/run/file_run_repository.py +++ b/src/intelligence_layer/evaluation/run/file_run_repository.py @@ -26,6 +26,8 @@ def store_run_overview(self, overview: RunOverview) -> None: overview.model_dump_json(indent=2), create_parents=True, ) + # create empty folder just in case no examples are ever saved + self.mkdir(self._run_directory(overview.id)) def run_overview(self, run_id: str) -> Optional[RunOverview]: file_path = self._run_overview_path(run_id) @@ -49,6 +51,10 @@ def store_example_output(self, example_output: ExampleOutput[Output]) -> None: def example_output( self, run_id: str, example_id: str, output_type: type[Output] ) -> Optional[ExampleOutput[Output]]: + path = self._run_output_directory(run_id) + if not self.exists(path): + raise ValueError(f"Repository does not contain a run with id: {run_id}") + file_path = self._example_output_path(run_id, example_id) if not self.exists(file_path): return None diff --git a/src/intelligence_layer/evaluation/run/in_memory_run_repository.py b/src/intelligence_layer/evaluation/run/in_memory_run_repository.py index 81066b78f..943a7d466 100644 --- a/src/intelligence_layer/evaluation/run/in_memory_run_repository.py +++ b/src/intelligence_layer/evaluation/run/in_memory_run_repository.py @@ -23,6 +23,8 @@ def __init__(self) -> None: def store_run_overview(self, overview: RunOverview) -> None: self._run_overviews[overview.id] = overview + if overview.id not in self._example_outputs.keys(): + self._example_outputs[overview.id] = [] def run_overview(self, run_id: str) -> Optional[RunOverview]: return self._run_overviews.get(run_id, None) @@ -38,6 +40,9 @@ def store_example_output(self, example_output: ExampleOutput[Output]) -> None: def example_output( self, run_id: str, example_id: str, output_type: type[Output] ) -> Optional[ExampleOutput[Output]]: + if run_id not in self._example_outputs.keys(): + raise ValueError(f"Repository does not contain a run with id: {run_id}") + if run_id not in self._example_outputs.keys(): return None diff --git a/tests/evaluation/test_run_repository.py b/tests/evaluation/test_run_repository.py index 2316420c3..d9edca43b 100644 --- a/tests/evaluation/test_run_repository.py +++ b/tests/evaluation/test_run_repository.py @@ -2,6 +2,7 @@ from typing import Iterable, Sequence, cast from uuid import uuid4 +import pytest from _pytest.fixtures import FixtureRequest from pytest import fixture, mark @@ -69,7 +70,7 @@ def test_run_repository_stores_and_returns_example_output( "repository_fixture", test_repository_fixtures, ) -def test_example_output_returns_none_for_not_existing_ids( +def test_example_output_returns_none_for_not_existing_example_id( repository_fixture: str, request: FixtureRequest, ) -> None: @@ -79,15 +80,32 @@ def test_example_output_returns_none_for_not_existing_ids( example_output = ExampleOutput(run_id=run_id, example_id=example_id, output=None) run_repository.store_example_output(example_output) - stored_example_outputs = [ - run_repository.example_output("not-existing-run-id", example_id, type(None)), - run_repository.example_output(run_id, "not-existing-example-id", type(None)), + assert ( + run_repository.example_output(run_id, "not-existing-example-id", type(None)) + is None + ) + + +@mark.parametrize( + "repository_fixture", + test_repository_fixtures, +) +def test_example_output_returns_none_for_not_existing_run_id( + repository_fixture: str, + request: FixtureRequest, +) -> None: + run_repository: RunRepository = request.getfixturevalue(repository_fixture) + run_id = "run-id" + example_id = "example-id" + example_output = ExampleOutput(run_id=run_id, example_id=example_id, output=None) + run_repository.store_example_output(example_output) + + with pytest.raises(ValueError): + run_repository.example_output("not-existing-run-id", example_id, type(None)) + with pytest.raises(ValueError): run_repository.example_output( "not-existing-run-id", "not-existing-example-id", type(None) - ), - ] - - assert stored_example_outputs == [None, None, None] + ) @mark.parametrize(