Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: make most how-tos runnable #757

Merged
merged 1 commit into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions src/examples/how_tos/example_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Sequence
from typing import Iterable, Sequence

from pydantic import BaseModel

Expand All @@ -16,6 +16,7 @@
RunOverview,
SuccessfulExampleOutput,
)
from intelligence_layer.evaluation.aggregation.aggregator import AggregationLogic


class DummyExample(Example[str, str]):
Expand All @@ -41,6 +42,15 @@ def do_evaluate(
)


class DummyAggregation(BaseModel):
num_evaluations: int


class DummyAggregationLogic(AggregationLogic[DummyEvaluation, DummyAggregation]):
def aggregate(self, evaluations: Iterable[DummyEvaluation]) -> DummyAggregation:
return DummyAggregation(num_evaluations=len(list(evaluations)))


class ExampleData:
examples: Sequence[DummyExample]
dataset_repository: InMemoryDatasetRepository
Expand All @@ -51,7 +61,8 @@ class ExampleData:
dataset: Dataset
run_overview_1: RunOverview
run_overview_2: RunOverview
evaluation_overview: EvaluationOverview
evaluation_overview_1: EvaluationOverview
evaluation_overview_2: EvaluationOverview


def example_data() -> ExampleData:
Expand All @@ -78,7 +89,12 @@ def example_data() -> ExampleData:
"my-evaluator",
DummyEvaluationLogic(),
)
evaluation_overview = evaluator.evaluate_runs(run_overview_1.id, run_overview_2.id)
evaluation_overview_1 = evaluator.evaluate_runs(
run_overview_1.id, run_overview_2.id
)
evaluation_overview_2 = evaluator.evaluate_runs(
run_overview_1.id, run_overview_2.id
)

example_data = ExampleData()
example_data.examples = examples
Expand All @@ -90,5 +106,7 @@ def example_data() -> ExampleData:
example_data.dataset = dataset
example_data.run_overview_1 = run_overview_1
example_data.run_overview_2 = run_overview_2
example_data.evaluation_overview = evaluation_overview
example_data.evaluation_overview_1 = evaluation_overview_1
example_data.evaluation_overview_2 = evaluation_overview_2

return example_data
37 changes: 25 additions & 12 deletions src/examples/how_tos/how_to_aggregate_evaluations.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from example_data import DummyAggregationLogic, example_data\n",
"\n",
"from intelligence_layer.evaluation.aggregation.aggregator import Aggregator\n",
"from intelligence_layer.evaluation.aggregation.in_memory_aggregation_repository import (\n",
" InMemoryAggregationRepository,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -20,15 +34,21 @@
"metadata": {},
"outputs": [],
"source": [
"%%script false --no-raise-error # the following code does not execute as the evaluations do not exist\n",
"\n",
"# Step 0\n",
"evaluation_ids = [\"eval_of_interest\", \"other_eval_of_interest\"]\n",
"\n",
"\n",
"my_example_data = example_data()\n",
"print()\n",
"\n",
"evaluation_ids = [\n",
" my_example_data.evaluation_overview_1.id,\n",
" my_example_data.evaluation_overview_2.id,\n",
"]\n",
"\n",
"# Step 1\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"evaluation_repository = my_example_data.evaluation_repository\n",
"aggregation_repository = InMemoryAggregationRepository()\n",
"aggregation_logic = SingleLabelClassifyAggregationLogic()\n",
"aggregation_logic = DummyAggregationLogic()\n",
"\n",
"# Step 2\n",
"aggregator = Aggregator(\n",
Expand All @@ -42,13 +62,6 @@
"# Step 3\n",
"print(aggregation_overview.id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
43 changes: 27 additions & 16 deletions src/examples/how_tos/how_to_evaluate_runs.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from example_data import DummyEvaluationLogic, example_data\n",
"\n",
"from intelligence_layer.evaluation.evaluation.evaluator import Evaluator\n",
"from intelligence_layer.evaluation.evaluation.in_memory_evaluation_repository import (\n",
" InMemoryEvaluationRepository,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -25,34 +39,31 @@
"metadata": {},
"outputs": [],
"source": [
"%%script false --no-raise-error # the following code does not execute as the runs do not exist\n",
"\n",
"# Step 0\n",
"run_ids = [\"run_id_of_interest\", \"other_run_id_of_interest\"]\n",
"my_example_data = example_data()\n",
"print()\n",
"run_ids = [my_example_data.run_overview_1.id, my_example_data.run_overview_2.id]\n",
"\n",
"# Step 1\n",
"dataset_repository = InMemoryDatasetRepository()\n",
"run_repository = InMemoryRunRepository()\n",
"dataset_repository = my_example_data.dataset_repository\n",
"run_repository = my_example_data.run_repository\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"evaluation_logic = SingleLabelClassifyEvaluationLogic()\n",
"evaluation_logic = DummyEvaluationLogic()\n",
"\n",
"# Step 3\n",
"evaluator = Evaluator(dataset_repository, run_repository, evaluation_repository, \"My joke evaluation\", evaluation_logic)\n",
"evaluator = Evaluator(\n",
" dataset_repository,\n",
" run_repository,\n",
" evaluation_repository,\n",
" \"My dummy evaluation\",\n",
" evaluation_logic,\n",
")\n",
"\n",
"evaluation_overview = evaluator.evaluate_runs(*run_ids)\n",
"\n",
"# Step 4\n",
"print(evaluation_overview.id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"```python\n",
"```"
]
}
],
"metadata": {
Expand Down
9 changes: 1 addition & 8 deletions src/examples/how_tos/how_to_retrieve_data_for_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
")\n",
"\n",
"# retrieve all evaluations, and an evaluation for an example\n",
"my_evaluation_id = example_data.evaluation_overview.id\n",
"my_evaluation_id = example_data.evaluation_overview_1.id\n",
"my_evaluations = evaluation_repository.example_evaluations(\n",
" my_evaluation_id, evaluation_type=DummyEvaluation\n",
")\n",
Expand Down Expand Up @@ -117,13 +117,6 @@
"my_lineage = my_evaluator.evaluation_lineage(my_evaluation_id, my_example_id)\n",
"display(my_lineage)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
31 changes: 19 additions & 12 deletions src/examples/how_tos/how_to_run_a_task_on_a_dataset.ipynb
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from example_data import DummyTask, example_data\n",
"\n",
"from intelligence_layer.evaluation.run.in_memory_run_repository import (\n",
" InMemoryRunRepository,\n",
")\n",
"from intelligence_layer.evaluation.run.runner import Runner"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# How to run a task on a dataset\n",
"0. Create a suitable dataset (see [here](./how_to_create_a_dataset.ipynb)) and a task (see [here](./how_to_implement_a_task.ipynb)).\n",
"1. Initialize the task, a `DatasetRepository` and a `RunRepository`\n",
"1. Initialize the task and a `RunRepository`, and open the correct `DatasetRepository`\n",
" - The `DatasetRepository` needs to contain the dataset.\n",
" - The `RunRepository` stores results.\n",
"2. Use the `Runner` to run the task on the given dataset via `run_dataset`\n",
Expand All @@ -21,25 +35,18 @@
"metadata": {},
"outputs": [],
"source": [
"%%script false --no-raise-error # the following code does not execute as the dataset does not exist\n",
"\n",
"# Step 0\n",
"dataset_id = \"my-dataset-id\"\n",
"\n",
"my_example_data = example_data()\n",
"print()\n",
"\n",
"# Step 1\n",
"class DummyTask(Task[None, None]):\n",
" def do_run(self, input: None, task_span: TaskSpan) -> None:\n",
" return None\n",
"\n",
"\n",
"dataset_repository = InMemoryDatasetRepository()\n",
"dataset_repository = my_example_data.dataset_repository\n",
"run_repository = InMemoryRunRepository()\n",
"task = DummyTask()\n",
"\n",
"# Step 2\n",
"runner = Runner(task, dataset_repository, run_repository, \"MyRunDescription\")\n",
"run_overview = runner.run_dataset(dataset_id)\n",
"run_overview = runner.run_dataset(my_example_data.dataset.id)\n",
"\n",
"# Step 3\n",
"print(run_overview.id)"
Expand Down