feat: Add warning to SingleLabelClassifyEvaluationLogic on missing in…

…put label IL-367
Aleph-Alpha · Apr 3, 2024 · bc04b36 · bc04b36
1 parent 65f4052
commit bc04b36
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 0 deletions.
diff --git a/src/intelligence_layer/use_cases/classify/classify.py b/src/intelligence_layer/use_cases/classify/classify.py
@@ -1,3 +1,4 @@
+import warnings
 from collections import defaultdict
 from typing import Iterable, Mapping, NewType, Sequence
 
@@ -102,6 +103,9 @@ def do_evaluate_single_output(
         sorted_classes = sorted(
             output.scores.items(), key=lambda item: item[1], reverse=True
         )
+        if example.expected_output[0] not in example.input.labels:
+            warn_message = f"[WARNING] Example with ID '{example.id}' has expected label '{example.expected_output}', which is not part of the example's input labels."
+            warnings.warn(warn_message, RuntimeWarning)
         if sorted_classes[0][0] in example.expected_output:
             correct = True
         else:

diff --git a/tests/use_cases/classify/test_prompt_based_classify.py b/tests/use_cases/classify/test_prompt_based_classify.py
@@ -1,5 +1,6 @@
 from typing import Sequence
 
+import pytest
 from pytest import fixture
 
 from intelligence_layer.core import InMemoryTracer, NoOpTracer, TextChunk
@@ -216,6 +217,35 @@ def test_can_evaluate_classify(
     assert evaluation.correct is True
 
 
+def test_classify_warns_on_missing_label(
+    in_memory_dataset_repository: InMemoryDatasetRepository,
+    classify_runner: Runner[ClassifyInput, SingleLabelClassifyOutput],
+    in_memory_evaluation_repository: InMemoryEvaluationRepository,
+    classify_evaluator: Evaluator[
+        ClassifyInput,
+        SingleLabelClassifyOutput,
+        Sequence[str],
+        SingleLabelClassifyEvaluation,
+    ],
+    prompt_based_classify: PromptBasedClassify,
+) -> None:
+    example = Example(
+        input=ClassifyInput(
+            chunk=TextChunk("This is good"),
+            labels=frozenset({"positive", "negative"}),
+        ),
+        expected_output=["SomethingElse"],
+    )
+
+    dataset_id = in_memory_dataset_repository.create_dataset(
+        examples=[example], dataset_name="test-dataset"
+    ).id
+
+    run_overview = classify_runner.run_dataset(dataset_id)
+
+    pytest.warns(RuntimeWarning, classify_evaluator.evaluate_runs, run_overview.id)
+
+
 def test_can_aggregate_evaluations(
     classify_evaluator: Evaluator[
         ClassifyInput,