From 29bbcfe3cae89dad88b6482dad3ad71f471f1eab Mon Sep 17 00:00:00 2001 From: Merlin Kallenborn Date: Wed, 18 Dec 2024 14:52:25 +0100 Subject: [PATCH] refactor: Make use of already calculated traces from lineages in benchmark execution function --- .../evaluation/benchmark/studio_benchmark.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py index ae5c9e0b..3b93f85c 100644 --- a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py +++ b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py @@ -180,7 +180,7 @@ def average_or_zero(list: list) -> float: benchmark_lineages = self._create_benchmark_lineages( eval_lineages=evaluation_lineages, - trace_ids=trace_ids, + traces=run_traces, ) self.client.submit_benchmark_lineages( @@ -229,21 +229,20 @@ def _create_benchmark_lineages( eval_lineages: list[ EvaluationLineage[Input, ExpectedOutput, Output, Evaluation] ], - trace_ids: list[str], + traces: list[Sequence[ExportedSpan]], ) -> Sequence[BenchmarkLineage[Input, Output, ExpectedOutput, Evaluation]]: return [ - self._create_benchmark_lineage(eval_lineage, trace_id) - for eval_lineage, trace_id in zip(eval_lineages, trace_ids, strict=True) + self._create_benchmark_lineage(eval_lineage, trace) + for eval_lineage, trace in zip(eval_lineages, traces, strict=True) ] def _create_benchmark_lineage( self, eval_lineage: EvaluationLineage[Input, ExpectedOutput, Output, Evaluation], - trace_id: str, + trace: Sequence[ExportedSpan], ) -> BenchmarkLineage: - trace = self._trace_from_lineage(eval_lineage) return BenchmarkLineage( - trace_id=trace_id, + trace_id=str(trace[0].context.trace_id), input=eval_lineage.example.input, expected_output=eval_lineage.example.expected_output, example_metadata=eval_lineage.example.metadata,