Skip to content

Commit

Permalink
feat: Add warning to SingleLabelClassifyEvaluationLogic on missing in…
Browse files Browse the repository at this point in the history
…put label

IL-367
  • Loading branch information
SebastianNiehusTNG committed Apr 3, 2024
1 parent 65f4052 commit bc04b36
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/intelligence_layer/use_cases/classify/classify.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from collections import defaultdict
from typing import Iterable, Mapping, NewType, Sequence

Expand Down Expand Up @@ -102,6 +103,9 @@ def do_evaluate_single_output(
sorted_classes = sorted(
output.scores.items(), key=lambda item: item[1], reverse=True
)
if example.expected_output[0] not in example.input.labels:
warn_message = f"[WARNING] Example with ID '{example.id}' has expected label '{example.expected_output}', which is not part of the example's input labels."
warnings.warn(warn_message, RuntimeWarning)
if sorted_classes[0][0] in example.expected_output:
correct = True
else:
Expand Down
30 changes: 30 additions & 0 deletions tests/use_cases/classify/test_prompt_based_classify.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Sequence

import pytest
from pytest import fixture

from intelligence_layer.core import InMemoryTracer, NoOpTracer, TextChunk
Expand Down Expand Up @@ -216,6 +217,35 @@ def test_can_evaluate_classify(
assert evaluation.correct is True


def test_classify_warns_on_missing_label(
in_memory_dataset_repository: InMemoryDatasetRepository,
classify_runner: Runner[ClassifyInput, SingleLabelClassifyOutput],
in_memory_evaluation_repository: InMemoryEvaluationRepository,
classify_evaluator: Evaluator[
ClassifyInput,
SingleLabelClassifyOutput,
Sequence[str],
SingleLabelClassifyEvaluation,
],
prompt_based_classify: PromptBasedClassify,
) -> None:
example = Example(
input=ClassifyInput(
chunk=TextChunk("This is good"),
labels=frozenset({"positive", "negative"}),
),
expected_output=["SomethingElse"],
)

dataset_id = in_memory_dataset_repository.create_dataset(
examples=[example], dataset_name="test-dataset"
).id

run_overview = classify_runner.run_dataset(dataset_id)

pytest.warns(RuntimeWarning, classify_evaluator.evaluate_runs, run_overview.id)


def test_can_aggregate_evaluations(
classify_evaluator: Evaluator[
ClassifyInput,
Expand Down

0 comments on commit bc04b36

Please sign in to comment.