Skip to content

Commit

Permalink
move prompt from experiment config to method params
Browse files Browse the repository at this point in the history
  • Loading branch information
japdubengsub committed Nov 8, 2024
1 parent 8032810 commit 075f07f
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 15 deletions.
12 changes: 7 additions & 5 deletions sdks/python/src/opik/api_objects/opik_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,23 +437,25 @@ def create_experiment(
Creates a new experiment using the given dataset name and optional parameters.
Args:
dataset_name (str): The name of the dataset to associate with the experiment.
name (Optional[str]): The optional name for the experiment. If None, a generated name will be used.
experiment_config (Optional[Dict[str, Any]]): Optional experiment configuration parameters. Must be a dictionary if provided.
dataset_name: The name of the dataset to associate with the experiment.
name: The optional name for the experiment. If None, a generated name will be used.
experiment_config: Optional experiment configuration parameters. Must be a dictionary if provided.
prompt: Prompt object to associate with the experiment.
Returns:
experiment.Experiment: The newly created experiment object.
"""
id = helpers.generate_id()
metadata = None
prompt_version: Optional[Dict[str, str]] = None
prompt: Optional[Prompt] = None

if isinstance(experiment_config, Mapping):
prompt = experiment_config.pop("prompt", None)
if prompt is not None:
prompt_version = {"id": prompt.__internal_api__version_id__}

if "prompt" not in experiment_config:
experiment_config["prompt"] = prompt.prompt

metadata = jsonable_encoder.jsonable_encoder(experiment_config)

elif experiment_config is not None:
Expand Down
2 changes: 2 additions & 0 deletions sdks/python/src/opik/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def evaluate(
threads are created, all tasks executed in the current thread sequentially.
are executed sequentially in the current thread.
Use more than 1 worker if your task object is compatible with sharing across threads.
prompt: Prompt object to link with experiment.
"""
client = opik_client.get_client_cached()
start_time = time.time()
Expand Down
4 changes: 2 additions & 2 deletions sdks/python/tests/e2e/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def task(item: Dict[str, Any]):
)

prompt = Prompt(
name=f"test-experiment-prompt-{_random_chars()}", prompt=f"test-experiment-prompt-template-{_random_chars()}"
name=f"test-experiment-prompt-{_random_chars()}",
prompt=f"test-experiment-prompt-template-{_random_chars()}",
)

equals_metric = metrics.Equals()
Expand All @@ -65,7 +66,6 @@ def task(item: Dict[str, Any]):
experiment_name=experiment_name,
experiment_config={
"model_name": "gpt-3.5",
"prompt": prompt,
},
prompt=prompt,
)
Expand Down
32 changes: 24 additions & 8 deletions sdks/python/tests/e2e/verifiers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from copy import deepcopy
from typing import Optional, Dict, Any, List
import opik
import json
Expand Down Expand Up @@ -208,9 +209,7 @@ def verify_experiment(

experiment_content = rest_client.experiments.get_experiment_by_id(id)

assert (
experiment_content.metadata == experiment_metadata
), f"{experiment_content.metadata} != {experiment_metadata}"
verify_experiment_metadata(experiment_content, experiment_metadata)

assert (
experiment_content.name == experiment_name
Expand All @@ -235,22 +234,39 @@ def verify_experiment(
verify_experiment_prompt(experiment_content, prompt)


def verify_experiment_metadata(
experiment_content: ExperimentPublic,
metadata: Dict,
):
experiment_metadata = deepcopy(experiment_content.metadata)
if experiment_metadata is None:
return
experiment_metadata.pop("prompt", None)

assert experiment_metadata == metadata, f"{experiment_metadata} != {metadata}"


def verify_experiment_prompt(
experiment_content: ExperimentPublic,
prompt: Optional[Prompt],
experiment_content: ExperimentPublic,
prompt: Optional[Prompt],
):
if prompt is None:
return

# asserting Prompt vs Experiment.prompt_version
assert (
experiment_content.prompt_version.id == prompt.__internal_api__version_id__
experiment_content.prompt_version.id == prompt.__internal_api__version_id__
), f"{experiment_content.prompt_version.id} != {prompt.__internal_api__version_id__}"

assert (
experiment_content.prompt_version.prompt_id == prompt.__internal_api__prompt_id__
experiment_content.prompt_version.prompt_id
== prompt.__internal_api__prompt_id__
), f"{experiment_content.prompt_version.prompt_id} != {prompt.__internal_api__prompt_id__}"

assert (
experiment_content.prompt_version.commit == prompt.commit
experiment_content.prompt_version.commit == prompt.commit
), f"{experiment_content.prompt_version.commit} != {prompt.commit}"

# check that experiment config/metadata contains Prompt's template
experiment_prompt = experiment_content.metadata["prompt"]
assert experiment_prompt == prompt.prompt, f"{experiment_prompt} != {prompt.prompt}"
1 change: 1 addition & 0 deletions sdks/python/tests/unit/evaluation/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def say_task(dataset_item: Dict[str, Any]):
dataset_name="the-dataset-name",
name="the-experiment-name",
experiment_config=None,
prompt=None,
)
mock_experiment.insert.assert_called_once_with(
experiment_items=[mock.ANY, mock.ANY]
Expand Down

0 comments on commit 075f07f

Please sign in to comment.