Skip to content

Commit

Permalink
IL-238 fail cases handled in RepositoryNavigator
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixFehseTNG committed Mar 28, 2024
1 parent 26a69cd commit bdcc36f
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class RunLineage(BaseModel, Generic[Input, ExpectedOutput, Output]):
output: ExampleOutput[Output]


class EvalLineage(BaseModel, Generic[Input, ExpectedOutput, Output, Evaluation]):
class EvaluationLineage(BaseModel, Generic[Input, ExpectedOutput, Output, Evaluation]):
example: Example[Input, ExpectedOutput]
outputs: Sequence[ExampleOutput[Output]]
evaluation: ExampleEvaluation[Evaluation]
Expand All @@ -41,7 +41,7 @@ def __init__(
self._run_repository = run_repository
self._eval_repository = evaluation_repository

def run_data(
def run_lineages(
self,
run_id: str,
input_type: type[Input],
Expand All @@ -50,7 +50,7 @@ def run_data(
) -> Iterable[RunLineage[Input, ExpectedOutput, Output]]:
run_overview = self._run_repository.run_overview(run_id)
if run_overview is None:
return []
raise ValueError(f"Run repository does not contain a run with id {run_id}.")

examples = list(
self._dataset_repository.examples(
Expand All @@ -69,38 +69,53 @@ def run_data(
if example.id == example_output.example_id:
yield RunLineage(example=example, output=example_output)

def eval_data(
def evaluation_lineages(
self,
eval_id: str,
evaluation_id: str,
input_type: type[Input],
expected_output_type: type[ExpectedOutput],
output_type: type[Output],
evaluation_type: type[Evaluation],
) -> Iterable[EvalLineage[Input, ExpectedOutput, Output, Evaluation]]:
) -> Iterable[EvaluationLineage[Input, ExpectedOutput, Output, Evaluation]]:
if self._eval_repository is None:
raise ValueError("Evaluation Repository is not set, but required.")
eval_overview = self._eval_repository.evaluation_overview(eval_id)

eval_overview = self._eval_repository.evaluation_overview(evaluation_id)
if eval_overview is None:
return []
raise ValueError(
f"Evaluation repository does not contain an evaluation with id {evaluation_id}."
)

evaluations = list(
self._eval_repository.example_evaluations(eval_id, evaluation_type)
self._eval_repository.example_evaluations(evaluation_id, evaluation_type)
)
run_lineages = itertools.chain.from_iterable(
self.run_data(overview.id, input_type, expected_output_type, output_type)
for overview in eval_overview.run_overviews
run_lineages = list(
itertools.chain.from_iterable(
self.run_lineages(
overview.id, input_type, expected_output_type, output_type
)
for overview in eval_overview.run_overviews
)
)

# join
for run_lineage, evaluation in itertools.product(run_lineages, evaluations):
if run_lineage.example.id == evaluation.example_id:
yield EvalLineage(
example=run_lineage.example,
output=run_lineage.output,
evaluation=evaluation,
for evaluation in evaluations:
example = None
outputs = []
for run_lineage in run_lineages:
if run_lineage.example.id == evaluation.example_id:
if example is None:
# the evaluation has only one example
# and all relevant run lineages contain the same example
example = run_lineage.example
outputs.append(run_lineage.output)

if example is not None:
yield EvaluationLineage(
example=example, outputs=outputs, evaluation=evaluation
)

def run_single_example(
def run_lineage(
self,
run_id: str,
example_id: str,
Expand All @@ -111,42 +126,61 @@ def run_single_example(

run_overview = self._run_repository.run_overview(run_id)
if run_overview is None:
return None
raise ValueError(f"Run repository does not contain a run with id {run_id}.")

example = self._dataset_repository.example(
run_overview.dataset_id, example_id, input_type, expected_output_type
)
if example is None:
return None

example_output = self._run_repository.example_output(
run_id, example_id, output_type
)
if example_output is None:
return None

return RunLineage(example=example, output=example_output)

def eval_single_example(
def evaluation_lineage(
self,
eval_id: str,
evaluation_id: str,
example_id: str,
input_type: type[Input],
expected_output_type: type[ExpectedOutput],
output_type: type[Output],
evaluation_type: type[Evaluation],
) -> Sequence[EvalLineage[Input, ExpectedOutput, Output, Evaluation]] | None:
) -> EvaluationLineage[Input, ExpectedOutput, Output, Evaluation] | None:

eval_overview = self._eval_repository.evaluation_overview(eval_id)
if self._eval_repository is None:
raise ValueError("Evaluation Repository is not set, but required.")

eval_overview = self._eval_repository.evaluation_overview(evaluation_id)
if eval_overview is None:
return None
raise ValueError(
f"Evaluation repository does not contain an evaluation with id {evaluation_id}."
)

run_lineages = [
self.run_single_example(
self.run_lineage(
overview.id, example_id, input_type, expected_output_type, output_type
)
for overview in eval_overview.run_overviews
]
existing_run_lineages = [
lineage for lineage in run_lineages if lineage is not None
]
if len(existing_run_lineages) == 0:
return None

example_evaluation = self._eval_repository.example_evaluation(
eval_id, example_id, evaluation_type
evaluation_id, example_id, evaluation_type
)
if example_evaluation is None:
return None

return EvalLineage(
example=example, output=example_output, evaluation=example_evaluation
return EvaluationLineage(
example=existing_run_lineages[0].example,
outputs=[lineage.output for lineage in existing_run_lineages],
evaluation=example_evaluation,
)
65 changes: 56 additions & 9 deletions tests/evaluation/test_repository_navigator.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def test_works_on_run_overviews(
run_overview: RunOverview,
) -> None:
# when
res = list(repository_navigator.run_data(run_overview.id, str, str, str))
res = list(repository_navigator.run_lineages(run_overview.id, str, str, str))

# then
res = sorted(res, key=lambda result: result.example.input)
Expand All @@ -161,7 +161,9 @@ def test_works_on_evaluation(
) -> None:
# when
res = list(
repository_navigator.eval_data(eval_overview.id, str, str, str, DummyEval)
repository_navigator.evaluation_lineages(
eval_overview.id, str, str, str, DummyEval
)
)

# then
Expand All @@ -182,20 +184,23 @@ def test_initialization_gives_warning_if_not_compatible() -> None:

x = RepositoryNavigator(dataset_repository, run_repository)
with pytest.raises(ValueError):
list(x.eval_data("irrelevant", str, str, str, DummyEval))
list(x.evaluation_lineages("irrelevant", str, str, str, DummyEval))
with pytest.raises(ValueError):
x.evaluation_lineage("irrelevant", "irrelevant", str, str, str, DummyEval)


def test_get_run_lineage_for_single_example(
examples: Sequence[DummyExample],
repository_navigator: RepositoryNavigator,
run_overview: RunOverview,
):
) -> None:
# when
res = repository_navigator.run_single_example(
res = repository_navigator.run_lineage(
run_overview.id, examples[0].id, str, str, str
)

# Then
assert res is not None
assert res.example.input == "input0"
assert res.output.output == "input0 -> output"

Expand All @@ -204,13 +209,55 @@ def test_get_eval_lineage_for_single_example(
examples: Sequence[DummyExample],
repository_navigator: RepositoryNavigator,
eval_overview: EvaluationOverview,
):
) -> None:
# when
res = repository_navigator.eval_single_example(
res = repository_navigator.evaluation_lineage(
eval_overview.id, examples[0].id, str, str, str, DummyEval
)

# Then
assert res is not None
assert res.example.input == "input0"
assert res.output.output == "input0 -> output"
assert res.evaluation.result.startswith("input0")
assert res.outputs[0].output == "input0 -> output"
assert len(res.outputs) == 2
eval_result = res.evaluation.result
assert isinstance(eval_result, DummyEval)
assert eval_result.eval.startswith("input0")


def test_get_run_lineage_for_non_existent_example_returns_none(
repository_navigator: RepositoryNavigator,
run_overview: RunOverview,
) -> None:
res = repository_navigator.run_lineage(
run_overview.id, "non-existent-id", str, str, str
)

assert res is None


def test_get_eval_lineage_for_non_existent_example_returns_none(
repository_navigator: RepositoryNavigator,
eval_overview: EvaluationOverview,
) -> None:
res = repository_navigator.evaluation_lineage(
eval_overview.id, "non-existent-id", str, str, str, DummyEval
)

assert res is None


def test_get_run_lineage_for_non_existent_run_id_returns_none(
repository_navigator: RepositoryNavigator,
) -> None:
with pytest.raises(ValueError):
repository_navigator.run_lineage("non-existent-id", "irrelevant", str, str, str)


def test_get_eval_lineage_for_non_existent_eval_id_returns_none(
repository_navigator: RepositoryNavigator,
) -> None:
with pytest.raises(ValueError):
repository_navigator.evaluation_lineage(
"non-existent-id", "irrelevant", str, str, str, DummyEval
)

0 comments on commit bdcc36f

Please sign in to comment.