From f32c1469d4bd0b25748416d11f68bddfe13932fe Mon Sep 17 00:00:00 2001 From: Alexander Barannikov <32936723+japdubengsub@users.noreply.github.com> Date: Fri, 8 Nov 2024 13:42:39 +0000 Subject: [PATCH] [OPIK-355] [Prompt library] SDK Prompt implementation - link with experiment (#590) * update openapi spec * add id field to prompt * [OPIK-321] Add prompt id to experiments * rename field to version_id * add prompt field to experiment * link prompt to experiment * fix linter warnings * Rename experiment prompt link dto * update openapi spec * sync API-call params with recent changes in openapi * move prompt from experiment config to method params --------- Co-authored-by: Thiago Hora --- .../code_generation/fern/openapi/openapi.yaml | 111 ++++++++ .../opik/api_objects/experiment/experiment.py | 4 +- .../src/opik/api_objects/opik_client.py | 22 +- .../src/opik/api_objects/prompt/prompt.py | 6 +- sdks/python/src/opik/evaluation/evaluator.py | 5 + sdks/python/src/opik/rest_api/__init__.py | 10 + .../src/opik/rest_api/experiments/client.py | 9 + sdks/python/src/opik/rest_api/spans/client.py | 8 + .../src/opik/rest_api/system_usage/client.py | 237 ++++++++++++++++++ .../python/src/opik/rest_api/traces/client.py | 8 + .../src/opik/rest_api/types/__init__.py | 10 + .../src/opik/rest_api/types/bi_information.py | 44 ++++ .../rest_api/types/bi_information_response.py | 43 ++++ .../src/opik/rest_api/types/experiment.py | 2 + .../opik/rest_api/types/experiment_public.py | 2 + .../rest_api/types/prompt_version_link.py | 44 ++++ .../types/prompt_version_link_public.py | 44 ++++ .../types/prompt_version_link_write.py | 42 ++++ sdks/python/tests/e2e/test_experiment.py | 14 +- sdks/python/tests/e2e/test_prompt.py | 17 +- sdks/python/tests/e2e/verifiers.py | 49 +++- .../tests/unit/evaluation/test_evaluate.py | 1 + 22 files changed, 716 insertions(+), 16 deletions(-) create mode 100644 sdks/python/src/opik/rest_api/types/bi_information.py create mode 100644 sdks/python/src/opik/rest_api/types/bi_information_response.py create mode 100644 sdks/python/src/opik/rest_api/types/prompt_version_link.py create mode 100644 sdks/python/src/opik/rest_api/types/prompt_version_link_public.py create mode 100644 sdks/python/src/opik/rest_api/types/prompt_version_link_write.py diff --git a/sdks/python/code_generation/fern/openapi/openapi.yaml b/sdks/python/code_generation/fern/openapi/openapi.yaml index 2d17712ea..de0d26956 100644 --- a/sdks/python/code_generation/fern/openapi/openapi.yaml +++ b/sdks/python/code_generation/fern/openapi/openapi.yaml @@ -52,6 +52,48 @@ tags: - name: Traces description: Trace related resources paths: + /v1/internal/usage/bi-datasets: + get: + tags: + - System usage + summary: Get datasets information for BI events + description: Get datasets information for BI events per user per workspace + operationId: getDatasetBiInfo + responses: + "200": + description: Datasets BiInformationResponse resource + content: + application/json: + schema: + $ref: '#/components/schemas/BiInformationResponse' + /v1/internal/usage/bi-experiments: + get: + tags: + - System usage + summary: Get experiments information for BI events + description: Get experiments information for BI events per user per workspace + operationId: getExperimentBiInfo + responses: + "200": + description: Experiments BiInformationResponse resource + content: + application/json: + schema: + $ref: '#/components/schemas/BiInformationResponse' + /v1/internal/usage/bi-traces: + get: + tags: + - System usage + summary: Get traces information for BI events + description: Get traces information for BI events per user per workspace + operationId: getTracesBiInfo + responses: + "200": + description: Traces BiInformationResponse resource + content: + application/json: + schema: + $ref: '#/components/schemas/BiInformationResponse' /v1/internal/usage/workspace-trace-counts: get: tags: @@ -1187,6 +1229,10 @@ paths: in: query schema: type: string + - name: truncate + in: query + schema: + type: boolean responses: "200": description: Spans resource @@ -1391,6 +1437,10 @@ paths: in: query schema: type: string + - name: truncate + in: query + schema: + type: boolean responses: "200": description: Trace resource @@ -1562,6 +1612,23 @@ paths: application/json: {} components: schemas: + BiInformation: + type: object + properties: + workspace_id: + type: string + user: + type: string + count: + type: integer + format: int64 + BiInformationResponse: + type: object + properties: + bi_information: + type: array + items: + $ref: '#/components/schemas/BiInformation' TraceCountResponse: type: object properties: @@ -2294,6 +2361,8 @@ components: last_updated_by: type: string readOnly: true + prompt_version: + $ref: '#/components/schemas/PromptVersionLink' FeedbackScoreAverage: required: - name @@ -2305,6 +2374,21 @@ components: value: type: number readOnly: true + PromptVersionLink: + required: + - id + type: object + properties: + id: + type: string + format: uuid + commit: + type: string + readOnly: true + prompt_id: + type: string + format: uuid + readOnly: true Experiment_Write: required: - dataset_name @@ -2319,6 +2403,16 @@ components: type: string metadata: $ref: '#/components/schemas/JsonNode_Write' + prompt_version: + $ref: '#/components/schemas/PromptVersionLink_Write' + PromptVersionLink_Write: + required: + - id + type: object + properties: + id: + type: string + format: uuid ExperimentItemsBatch: required: - experiment_items @@ -2414,6 +2508,8 @@ components: last_updated_by: type: string readOnly: true + prompt_version: + $ref: '#/components/schemas/PromptVersionLink_Public' FeedbackScoreAverage_Public: required: - name @@ -2425,6 +2521,21 @@ components: value: type: number readOnly: true + PromptVersionLink_Public: + required: + - id + type: object + properties: + id: + type: string + format: uuid + commit: + type: string + readOnly: true + prompt_id: + type: string + format: uuid + readOnly: true ErrorMessage_Public: type: object properties: diff --git a/sdks/python/src/opik/api_objects/experiment/experiment.py b/sdks/python/src/opik/api_objects/experiment/experiment.py index 0c60834f8..e3bcf4252 100644 --- a/sdks/python/src/opik/api_objects/experiment/experiment.py +++ b/sdks/python/src/opik/api_objects/experiment/experiment.py @@ -5,7 +5,7 @@ from opik.rest_api.types import experiment_item as rest_experiment_item from . import experiment_item from .. import helpers, constants - +from ... import Prompt LOGGER = logging.getLogger(__name__) @@ -17,11 +17,13 @@ def __init__( name: Optional[str], dataset_name: str, rest_client: rest_api_client.OpikApi, + prompt: Optional[Prompt] = None, ) -> None: self.id = id self.name = name self.dataset_name = dataset_name self._rest_client = rest_client + self.prompt = prompt def insert(self, experiment_items: List[experiment_item.ExperimentItem]) -> None: rest_experiment_items = [ diff --git a/sdks/python/src/opik/api_objects/opik_client.py b/sdks/python/src/opik/api_objects/opik_client.py index cad296cb7..2c48e6d7c 100644 --- a/sdks/python/src/opik/api_objects/opik_client.py +++ b/sdks/python/src/opik/api_objects/opik_client.py @@ -431,36 +431,45 @@ def create_experiment( dataset_name: str, name: Optional[str] = None, experiment_config: Optional[Dict[str, Any]] = None, + prompt: Optional[Prompt] = None, ) -> experiment.Experiment: """ Creates a new experiment using the given dataset name and optional parameters. Args: - dataset_name (str): The name of the dataset to associate with the experiment. - name (Optional[str]): The optional name for the experiment. If None, a generated name will be used. - experiment_config (Optional[Dict[str, Any]]): Optional experiment configuration parameters. Must be a dictionary if provided. + dataset_name: The name of the dataset to associate with the experiment. + name: The optional name for the experiment. If None, a generated name will be used. + experiment_config: Optional experiment configuration parameters. Must be a dictionary if provided. + prompt: Prompt object to associate with the experiment. Returns: experiment.Experiment: The newly created experiment object. """ id = helpers.generate_id() + metadata = None + prompt_version: Optional[Dict[str, str]] = None if isinstance(experiment_config, Mapping): + if prompt is not None: + prompt_version = {"id": prompt.__internal_api__version_id__} + + if "prompt" not in experiment_config: + experiment_config["prompt"] = prompt.prompt + metadata = jsonable_encoder.jsonable_encoder(experiment_config) + elif experiment_config is not None: LOGGER.error( "Experiment config must be dictionary, but %s was provided. Config will not be logged.", experiment_config, ) - metadata = None - else: - metadata = None self._rest_client.experiments.create_experiment( name=name, dataset_name=dataset_name, id=id, metadata=metadata, + prompt_version=prompt_version, ) experiment_ = experiment.Experiment( @@ -468,6 +477,7 @@ def create_experiment( name=name, dataset_name=dataset_name, rest_client=self._rest_client, + prompt=prompt, ) return experiment_ diff --git a/sdks/python/src/opik/api_objects/prompt/prompt.py b/sdks/python/src/opik/api_objects/prompt/prompt.py index 507df4318..6cef49e60 100644 --- a/sdks/python/src/opik/api_objects/prompt/prompt.py +++ b/sdks/python/src/opik/api_objects/prompt/prompt.py @@ -33,6 +33,9 @@ def __init__( self._name = new_instance.name self._prompt = new_instance.prompt self._commit = new_instance.commit + self.__internal_api__version_id__: str = ( + new_instance.__internal_api__version_id__ + ) self.__internal_api__prompt_id__: str = new_instance.__internal_api__prompt_id__ @property @@ -75,7 +78,8 @@ def from_fern_prompt_version( # will not call __init__ to avoid API calls, create new instance with __new__ prompt = cls.__new__(cls) - prompt.__internal_api__prompt_id__ = prompt_version.id + prompt.__internal_api__version_id__ = prompt_version.id + prompt.__internal_api__prompt_id__ = prompt_version.prompt_id prompt._name = name prompt._prompt = prompt_version.template prompt._commit = prompt_version.commit diff --git a/sdks/python/src/opik/evaluation/evaluator.py b/sdks/python/src/opik/evaluation/evaluator.py index 54ab00f08..17b3756a5 100644 --- a/sdks/python/src/opik/evaluation/evaluator.py +++ b/sdks/python/src/opik/evaluation/evaluator.py @@ -3,6 +3,7 @@ from .types import LLMTask from .metrics import base_metric +from .. import Prompt from ..api_objects.dataset import dataset from ..api_objects.experiment import experiment_item from ..api_objects import opik_client @@ -20,6 +21,7 @@ def evaluate( verbose: int = 1, nb_samples: Optional[int] = None, task_threads: int = 16, + prompt: Optional[Prompt] = None, ) -> evaluation_result.EvaluationResult: """ Performs task evaluation on a given dataset. @@ -52,6 +54,8 @@ def evaluate( threads are created, all tasks executed in the current thread sequentially. are executed sequentially in the current thread. Use more than 1 worker if your task object is compatible with sharing across threads. + + prompt: Prompt object to link with experiment. """ client = opik_client.get_client_cached() start_time = time.time() @@ -78,6 +82,7 @@ def evaluate( name=experiment_name, dataset_name=dataset.name, experiment_config=experiment_config, + prompt=prompt, ) report.display_experiment_link(dataset.name, experiment.id) diff --git a/sdks/python/src/opik/rest_api/__init__.py b/sdks/python/src/opik/rest_api/__init__.py index 822486e35..2cf734157 100644 --- a/sdks/python/src/opik/rest_api/__init__.py +++ b/sdks/python/src/opik/rest_api/__init__.py @@ -1,6 +1,8 @@ # This file was auto-generated by Fern from our API Definition. from .types import ( + BiInformation, + BiInformationResponse, CategoricalFeedbackDefinition, CategoricalFeedbackDefinitionCreate, CategoricalFeedbackDefinitionPublic, @@ -87,6 +89,9 @@ PromptPublic, PromptVersion, PromptVersionDetail, + PromptVersionLink, + PromptVersionLinkPublic, + PromptVersionLinkWrite, PromptVersionPagePublic, PromptVersionPublic, Span, @@ -128,6 +133,8 @@ __all__ = [ "BadRequestError", + "BiInformation", + "BiInformationResponse", "CategoricalFeedbackDefinition", "CategoricalFeedbackDefinitionCreate", "CategoricalFeedbackDefinitionPublic", @@ -220,6 +227,9 @@ "PromptPublic", "PromptVersion", "PromptVersionDetail", + "PromptVersionLink", + "PromptVersionLinkPublic", + "PromptVersionLinkWrite", "PromptVersionPagePublic", "PromptVersionPublic", "Span", diff --git a/sdks/python/src/opik/rest_api/experiments/client.py b/sdks/python/src/opik/rest_api/experiments/client.py index 1b15d59cb..8a493ea07 100644 --- a/sdks/python/src/opik/rest_api/experiments/client.py +++ b/sdks/python/src/opik/rest_api/experiments/client.py @@ -14,6 +14,7 @@ from ..types.experiment_page_public import ExperimentPagePublic from ..types.experiment_public import ExperimentPublic from ..types.json_node_write import JsonNodeWrite +from ..types.prompt_version_link_write import PromptVersionLinkWrite # this is used as the default value for optional parameters OMIT = typing.cast(typing.Any, ...) @@ -81,6 +82,7 @@ def create_experiment( id: typing.Optional[str] = OMIT, name: typing.Optional[str] = OMIT, metadata: typing.Optional[JsonNodeWrite] = OMIT, + prompt_version: typing.Optional[PromptVersionLinkWrite] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> None: """ @@ -96,6 +98,8 @@ def create_experiment( metadata : typing.Optional[JsonNodeWrite] + prompt_version : typing.Optional[PromptVersionLinkWrite] + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -120,6 +124,7 @@ def create_experiment( "dataset_name": dataset_name, "name": name, "metadata": metadata, + "prompt_version": prompt_version, }, request_options=request_options, omit=OMIT, @@ -490,6 +495,7 @@ async def create_experiment( id: typing.Optional[str] = OMIT, name: typing.Optional[str] = OMIT, metadata: typing.Optional[JsonNodeWrite] = OMIT, + prompt_version: typing.Optional[PromptVersionLinkWrite] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> None: """ @@ -505,6 +511,8 @@ async def create_experiment( metadata : typing.Optional[JsonNodeWrite] + prompt_version : typing.Optional[PromptVersionLinkWrite] + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -537,6 +545,7 @@ async def main() -> None: "dataset_name": dataset_name, "name": name, "metadata": metadata, + "prompt_version": prompt_version, }, request_options=request_options, omit=OMIT, diff --git a/sdks/python/src/opik/rest_api/spans/client.py b/sdks/python/src/opik/rest_api/spans/client.py index f0ff339bf..bdb2930e1 100644 --- a/sdks/python/src/opik/rest_api/spans/client.py +++ b/sdks/python/src/opik/rest_api/spans/client.py @@ -123,6 +123,7 @@ def get_spans_by_project( trace_id: typing.Optional[str] = None, type: typing.Optional[GetSpansByProjectRequestType] = None, filters: typing.Optional[str] = None, + truncate: typing.Optional[bool] = None, request_options: typing.Optional[RequestOptions] = None, ) -> SpanPagePublic: """ @@ -144,6 +145,8 @@ def get_spans_by_project( filters : typing.Optional[str] + truncate : typing.Optional[bool] + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -170,6 +173,7 @@ def get_spans_by_project( "trace_id": trace_id, "type": type, "filters": filters, + "truncate": truncate, }, request_options=request_options, ) @@ -725,6 +729,7 @@ async def get_spans_by_project( trace_id: typing.Optional[str] = None, type: typing.Optional[GetSpansByProjectRequestType] = None, filters: typing.Optional[str] = None, + truncate: typing.Optional[bool] = None, request_options: typing.Optional[RequestOptions] = None, ) -> SpanPagePublic: """ @@ -746,6 +751,8 @@ async def get_spans_by_project( filters : typing.Optional[str] + truncate : typing.Optional[bool] + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -780,6 +787,7 @@ async def main() -> None: "trace_id": trace_id, "type": type, "filters": filters, + "truncate": truncate, }, request_options=request_options, ) diff --git a/sdks/python/src/opik/rest_api/system_usage/client.py b/sdks/python/src/opik/rest_api/system_usage/client.py index aac45ee0e..1ca28c3f0 100644 --- a/sdks/python/src/opik/rest_api/system_usage/client.py +++ b/sdks/python/src/opik/rest_api/system_usage/client.py @@ -7,6 +7,7 @@ from ..core.client_wrapper import AsyncClientWrapper, SyncClientWrapper from ..core.pydantic_utilities import pydantic_v1 from ..core.request_options import RequestOptions +from ..types.bi_information_response import BiInformationResponse from ..types.trace_count_response import TraceCountResponse @@ -14,6 +15,112 @@ class SystemUsageClient: def __init__(self, *, client_wrapper: SyncClientWrapper): self._client_wrapper = client_wrapper + def get_dataset_bi_info( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> BiInformationResponse: + """ + Get datasets information for BI events per user per workspace + + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + BiInformationResponse + Datasets BiInformationResponse resource + + Examples + -------- + from Opik.client import OpikApi + + client = OpikApi() + client.system_usage.get_dataset_bi_info() + """ + _response = self._client_wrapper.httpx_client.request( + "v1/internal/usage/bi-datasets", + method="GET", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + return pydantic_v1.parse_obj_as(BiInformationResponse, _response.json()) # type: ignore + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + def get_experiment_bi_info( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> BiInformationResponse: + """ + Get experiments information for BI events per user per workspace + + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + BiInformationResponse + Experiments BiInformationResponse resource + + Examples + -------- + from Opik.client import OpikApi + + client = OpikApi() + client.system_usage.get_experiment_bi_info() + """ + _response = self._client_wrapper.httpx_client.request( + "v1/internal/usage/bi-experiments", + method="GET", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + return pydantic_v1.parse_obj_as(BiInformationResponse, _response.json()) # type: ignore + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + def get_traces_bi_info( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> BiInformationResponse: + """ + Get traces information for BI events per user per workspace + + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + BiInformationResponse + Traces BiInformationResponse resource + + Examples + -------- + from Opik.client import OpikApi + + client = OpikApi() + client.system_usage.get_traces_bi_info() + """ + _response = self._client_wrapper.httpx_client.request( + "v1/internal/usage/bi-traces", method="GET", request_options=request_options + ) + try: + if 200 <= _response.status_code < 300: + return pydantic_v1.parse_obj_as(BiInformationResponse, _response.json()) # type: ignore + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + def get_traces_count_for_workspaces( self, *, request_options: typing.Optional[RequestOptions] = None ) -> TraceCountResponse: @@ -55,6 +162,136 @@ class AsyncSystemUsageClient: def __init__(self, *, client_wrapper: AsyncClientWrapper): self._client_wrapper = client_wrapper + async def get_dataset_bi_info( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> BiInformationResponse: + """ + Get datasets information for BI events per user per workspace + + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + BiInformationResponse + Datasets BiInformationResponse resource + + Examples + -------- + import asyncio + + from Opik.client import AsyncOpikApi + + client = AsyncOpikApi() + + + async def main() -> None: + await client.system_usage.get_dataset_bi_info() + + + asyncio.run(main()) + """ + _response = await self._client_wrapper.httpx_client.request( + "v1/internal/usage/bi-datasets", + method="GET", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + return pydantic_v1.parse_obj_as(BiInformationResponse, _response.json()) # type: ignore + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + async def get_experiment_bi_info( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> BiInformationResponse: + """ + Get experiments information for BI events per user per workspace + + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + BiInformationResponse + Experiments BiInformationResponse resource + + Examples + -------- + import asyncio + + from Opik.client import AsyncOpikApi + + client = AsyncOpikApi() + + + async def main() -> None: + await client.system_usage.get_experiment_bi_info() + + + asyncio.run(main()) + """ + _response = await self._client_wrapper.httpx_client.request( + "v1/internal/usage/bi-experiments", + method="GET", + request_options=request_options, + ) + try: + if 200 <= _response.status_code < 300: + return pydantic_v1.parse_obj_as(BiInformationResponse, _response.json()) # type: ignore + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + async def get_traces_bi_info( + self, *, request_options: typing.Optional[RequestOptions] = None + ) -> BiInformationResponse: + """ + Get traces information for BI events per user per workspace + + Parameters + ---------- + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + BiInformationResponse + Traces BiInformationResponse resource + + Examples + -------- + import asyncio + + from Opik.client import AsyncOpikApi + + client = AsyncOpikApi() + + + async def main() -> None: + await client.system_usage.get_traces_bi_info() + + + asyncio.run(main()) + """ + _response = await self._client_wrapper.httpx_client.request( + "v1/internal/usage/bi-traces", method="GET", request_options=request_options + ) + try: + if 200 <= _response.status_code < 300: + return pydantic_v1.parse_obj_as(BiInformationResponse, _response.json()) # type: ignore + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + async def get_traces_count_for_workspaces( self, *, request_options: typing.Optional[RequestOptions] = None ) -> TraceCountResponse: diff --git a/sdks/python/src/opik/rest_api/traces/client.py b/sdks/python/src/opik/rest_api/traces/client.py index 20f16b0a3..4e91b51fa 100644 --- a/sdks/python/src/opik/rest_api/traces/client.py +++ b/sdks/python/src/opik/rest_api/traces/client.py @@ -117,6 +117,7 @@ def get_traces_by_project( project_name: typing.Optional[str] = None, project_id: typing.Optional[str] = None, filters: typing.Optional[str] = None, + truncate: typing.Optional[bool] = None, request_options: typing.Optional[RequestOptions] = None, ) -> TracePagePublic: """ @@ -134,6 +135,8 @@ def get_traces_by_project( filters : typing.Optional[str] + truncate : typing.Optional[bool] + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -158,6 +161,7 @@ def get_traces_by_project( "project_name": project_name, "project_id": project_id, "filters": filters, + "truncate": truncate, }, request_options=request_options, ) @@ -710,6 +714,7 @@ async def get_traces_by_project( project_name: typing.Optional[str] = None, project_id: typing.Optional[str] = None, filters: typing.Optional[str] = None, + truncate: typing.Optional[bool] = None, request_options: typing.Optional[RequestOptions] = None, ) -> TracePagePublic: """ @@ -727,6 +732,8 @@ async def get_traces_by_project( filters : typing.Optional[str] + truncate : typing.Optional[bool] + request_options : typing.Optional[RequestOptions] Request-specific configuration. @@ -759,6 +766,7 @@ async def main() -> None: "project_name": project_name, "project_id": project_id, "filters": filters, + "truncate": truncate, }, request_options=request_options, ) diff --git a/sdks/python/src/opik/rest_api/types/__init__.py b/sdks/python/src/opik/rest_api/types/__init__.py index 62be2d12f..04dc13de0 100644 --- a/sdks/python/src/opik/rest_api/types/__init__.py +++ b/sdks/python/src/opik/rest_api/types/__init__.py @@ -1,5 +1,7 @@ # This file was auto-generated by Fern from our API Definition. +from .bi_information import BiInformation +from .bi_information_response import BiInformationResponse from .categorical_feedback_definition import CategoricalFeedbackDefinition from .categorical_feedback_definition_create import CategoricalFeedbackDefinitionCreate from .categorical_feedback_definition_public import CategoricalFeedbackDefinitionPublic @@ -92,6 +94,9 @@ from .prompt_public import PromptPublic from .prompt_version import PromptVersion from .prompt_version_detail import PromptVersionDetail +from .prompt_version_link import PromptVersionLink +from .prompt_version_link_public import PromptVersionLinkPublic +from .prompt_version_link_write import PromptVersionLinkWrite from .prompt_version_page_public import PromptVersionPagePublic from .prompt_version_public import PromptVersionPublic from .span import Span @@ -111,6 +116,8 @@ from .workspace_trace_count import WorkspaceTraceCount __all__ = [ + "BiInformation", + "BiInformationResponse", "CategoricalFeedbackDefinition", "CategoricalFeedbackDefinitionCreate", "CategoricalFeedbackDefinitionPublic", @@ -197,6 +204,9 @@ "PromptPublic", "PromptVersion", "PromptVersionDetail", + "PromptVersionLink", + "PromptVersionLinkPublic", + "PromptVersionLinkWrite", "PromptVersionPagePublic", "PromptVersionPublic", "Span", diff --git a/sdks/python/src/opik/rest_api/types/bi_information.py b/sdks/python/src/opik/rest_api/types/bi_information.py new file mode 100644 index 000000000..87c07760f --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/bi_information.py @@ -0,0 +1,44 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 + + +class BiInformation(pydantic_v1.BaseModel): + workspace_id: typing.Optional[str] = None + user: typing.Optional[str] = None + count: typing.Optional[int] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/types/bi_information_response.py b/sdks/python/src/opik/rest_api/types/bi_information_response.py new file mode 100644 index 000000000..ecbaae802 --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/bi_information_response.py @@ -0,0 +1,43 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .bi_information import BiInformation + + +class BiInformationResponse(pydantic_v1.BaseModel): + bi_information: typing.Optional[typing.List[BiInformation]] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/types/experiment.py b/sdks/python/src/opik/rest_api/types/experiment.py index 00d14a682..4c7d4377d 100644 --- a/sdks/python/src/opik/rest_api/types/experiment.py +++ b/sdks/python/src/opik/rest_api/types/experiment.py @@ -7,6 +7,7 @@ from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 from .feedback_score_average import FeedbackScoreAverage from .json_node import JsonNode +from .prompt_version_link import PromptVersionLink class Experiment(pydantic_v1.BaseModel): @@ -21,6 +22,7 @@ class Experiment(pydantic_v1.BaseModel): last_updated_at: typing.Optional[dt.datetime] = None created_by: typing.Optional[str] = None last_updated_by: typing.Optional[str] = None + prompt_version: typing.Optional[PromptVersionLink] = None def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { diff --git a/sdks/python/src/opik/rest_api/types/experiment_public.py b/sdks/python/src/opik/rest_api/types/experiment_public.py index 747044e95..fe9f1ea67 100644 --- a/sdks/python/src/opik/rest_api/types/experiment_public.py +++ b/sdks/python/src/opik/rest_api/types/experiment_public.py @@ -7,6 +7,7 @@ from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 from .feedback_score_average_public import FeedbackScoreAveragePublic from .json_node_public import JsonNodePublic +from .prompt_version_link_public import PromptVersionLinkPublic class ExperimentPublic(pydantic_v1.BaseModel): @@ -21,6 +22,7 @@ class ExperimentPublic(pydantic_v1.BaseModel): last_updated_at: typing.Optional[dt.datetime] = None created_by: typing.Optional[str] = None last_updated_by: typing.Optional[str] = None + prompt_version: typing.Optional[PromptVersionLinkPublic] = None def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { diff --git a/sdks/python/src/opik/rest_api/types/prompt_version_link.py b/sdks/python/src/opik/rest_api/types/prompt_version_link.py new file mode 100644 index 000000000..bc4934c19 --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/prompt_version_link.py @@ -0,0 +1,44 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 + + +class PromptVersionLink(pydantic_v1.BaseModel): + id: str + commit: typing.Optional[str] = None + prompt_id: typing.Optional[str] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/types/prompt_version_link_public.py b/sdks/python/src/opik/rest_api/types/prompt_version_link_public.py new file mode 100644 index 000000000..4fb8518a2 --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/prompt_version_link_public.py @@ -0,0 +1,44 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 + + +class PromptVersionLinkPublic(pydantic_v1.BaseModel): + id: str + commit: typing.Optional[str] = None + prompt_id: typing.Optional[str] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/src/opik/rest_api/types/prompt_version_link_write.py b/sdks/python/src/opik/rest_api/types/prompt_version_link_write.py new file mode 100644 index 000000000..6f535712d --- /dev/null +++ b/sdks/python/src/opik/rest_api/types/prompt_version_link_write.py @@ -0,0 +1,42 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ..core.datetime_utils import serialize_datetime +from ..core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 + + +class PromptVersionLinkWrite(pydantic_v1.BaseModel): + id: str + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/sdks/python/tests/e2e/test_experiment.py b/sdks/python/tests/e2e/test_experiment.py index 5b21fcffb..69d695cea 100644 --- a/sdks/python/tests/e2e/test_experiment.py +++ b/sdks/python/tests/e2e/test_experiment.py @@ -2,10 +2,11 @@ import opik -from opik import synchronization +from opik import Prompt, synchronization from opik.api_objects.dataset import dataset_item from opik.evaluation import metrics from . import verifiers +from .conftest import _random_chars def test_experiment_creation_via_evaluate_function__happyflow( @@ -52,13 +53,21 @@ def task(item: Dict[str, Any]): f"Task received dataset item with an unexpected input: {item['input']}" ) + prompt = Prompt( + name=f"test-experiment-prompt-{_random_chars()}", + prompt=f"test-experiment-prompt-template-{_random_chars()}", + ) + equals_metric = metrics.Equals() evaluation_result = opik.evaluate( dataset=dataset, task=task, scoring_metrics=[equals_metric], experiment_name=experiment_name, - experiment_config={"model_name": "gpt-3.5"}, + experiment_config={ + "model_name": "gpt-3.5", + }, + prompt=prompt, ) opik.flush_tracker() @@ -70,6 +79,7 @@ def task(item: Dict[str, Any]): experiment_metadata={"model_name": "gpt-3.5"}, traces_amount=3, # one trace per dataset item feedback_scores_amount=1, # an average value of all Equals metric scores + prompt=prompt, ) # TODO: check more content of the experiment # diff --git a/sdks/python/tests/e2e/test_prompt.py b/sdks/python/tests/e2e/test_prompt.py index 9ae3ad867..cfa45f1ef 100644 --- a/sdks/python/tests/e2e/test_prompt.py +++ b/sdks/python/tests/e2e/test_prompt.py @@ -16,6 +16,7 @@ def test_prompt__create__happyflow(opik_client): assert prompt.name == prompt_name assert prompt.prompt == prompt_template + assert prompt.__internal_api__version_id__ is not None assert prompt.__internal_api__prompt_id__ is not None assert prompt.commit is not None @@ -43,7 +44,10 @@ def test_prompt__create_new_version__happyflow(opik_client): assert new_prompt.name == prompt.name assert new_prompt.prompt == prompt_template_new - assert new_prompt.__internal_api__prompt_id__ != prompt.__internal_api__prompt_id__ + assert ( + new_prompt.__internal_api__version_id__ != prompt.__internal_api__version_id__ + ) + assert new_prompt.__internal_api__prompt_id__ == prompt.__internal_api__prompt_id__ assert new_prompt.commit != prompt.commit @@ -67,11 +71,14 @@ def test_prompt__do_not_create_new_version_with_the_same_template(opik_client): assert new_prompt.name == prompt.name assert new_prompt.prompt == prompt.prompt + assert ( + new_prompt.__internal_api__version_id__ == prompt.__internal_api__version_id__ + ) assert new_prompt.__internal_api__prompt_id__ == prompt.__internal_api__prompt_id__ assert new_prompt.commit == prompt.commit -def test_prompt__get__happyflow(opik_client): +def test_prompt__get_by_name__happyflow(opik_client): unique_identifier = str(uuid.uuid4())[-6:] prompt_name = f"some-prompt-name-{unique_identifier}" @@ -96,6 +103,7 @@ def test_prompt__get__happyflow(opik_client): assert p1.name == new_prompt.name assert p1.prompt == new_prompt.prompt + assert p1.__internal_api__version_id__ == new_prompt.__internal_api__version_id__ assert p1.__internal_api__prompt_id__ == new_prompt.__internal_api__prompt_id__ assert p1.commit == new_prompt.commit @@ -103,6 +111,7 @@ def test_prompt__get__happyflow(opik_client): assert p2.name == prompt.name assert p2.prompt == prompt.prompt + assert p2.__internal_api__version_id__ == prompt.__internal_api__version_id__ assert p2.__internal_api__prompt_id__ == prompt.__internal_api__prompt_id__ assert p2.commit == prompt.commit @@ -126,6 +135,10 @@ def test_prompt__initialize_class_instance(opik_client): assert prompt.name == prompt_from_api.name assert prompt.prompt == prompt_from_api.prompt + assert ( + prompt.__internal_api__version_id__ + == prompt_from_api.__internal_api__version_id__ + ) assert ( prompt.__internal_api__prompt_id__ == prompt_from_api.__internal_api__prompt_id__ diff --git a/sdks/python/tests/e2e/verifiers.py b/sdks/python/tests/e2e/verifiers.py index 10734e1f3..1ca9afbbe 100644 --- a/sdks/python/tests/e2e/verifiers.py +++ b/sdks/python/tests/e2e/verifiers.py @@ -1,10 +1,12 @@ +from copy import deepcopy from typing import Optional, Dict, Any, List import opik import json +from opik.rest_api import ExperimentPublic from opik.types import FeedbackScoreDict from opik.api_objects.dataset import dataset_item -from opik import synchronization +from opik import Prompt, synchronization from .. import testlib import mock @@ -191,6 +193,7 @@ def verify_experiment( experiment_metadata: Optional[Dict[str, Any]], feedback_scores_amount: int, traces_amount: int, + prompt: Optional[Prompt] = None, ): rest_client = ( opik_client._rest_client @@ -206,9 +209,7 @@ def verify_experiment( experiment_content = rest_client.experiments.get_experiment_by_id(id) - assert ( - experiment_content.metadata == experiment_metadata - ), f"{experiment_content.metadata} != {experiment_metadata}" + verify_experiment_metadata(experiment_content, experiment_metadata) assert ( experiment_content.name == experiment_name @@ -229,3 +230,43 @@ def verify_experiment( assert ( actual_trace_count == traces_amount ), f"{actual_trace_count} != {traces_amount}" + + verify_experiment_prompt(experiment_content, prompt) + + +def verify_experiment_metadata( + experiment_content: ExperimentPublic, + metadata: Dict, +): + experiment_metadata = deepcopy(experiment_content.metadata) + if experiment_metadata is None: + return + experiment_metadata.pop("prompt", None) + + assert experiment_metadata == metadata, f"{experiment_metadata} != {metadata}" + + +def verify_experiment_prompt( + experiment_content: ExperimentPublic, + prompt: Optional[Prompt], +): + if prompt is None: + return + + # asserting Prompt vs Experiment.prompt_version + assert ( + experiment_content.prompt_version.id == prompt.__internal_api__version_id__ + ), f"{experiment_content.prompt_version.id} != {prompt.__internal_api__version_id__}" + + assert ( + experiment_content.prompt_version.prompt_id + == prompt.__internal_api__prompt_id__ + ), f"{experiment_content.prompt_version.prompt_id} != {prompt.__internal_api__prompt_id__}" + + assert ( + experiment_content.prompt_version.commit == prompt.commit + ), f"{experiment_content.prompt_version.commit} != {prompt.commit}" + + # check that experiment config/metadata contains Prompt's template + experiment_prompt = experiment_content.metadata["prompt"] + assert experiment_prompt == prompt.prompt, f"{experiment_prompt} != {prompt.prompt}" diff --git a/sdks/python/tests/unit/evaluation/test_evaluate.py b/sdks/python/tests/unit/evaluation/test_evaluate.py index 2f36fdc76..9db97e653 100644 --- a/sdks/python/tests/unit/evaluation/test_evaluate.py +++ b/sdks/python/tests/unit/evaluation/test_evaluate.py @@ -85,6 +85,7 @@ def say_task(dataset_item: Dict[str, Any]): dataset_name="the-dataset-name", name="the-experiment-name", experiment_config=None, + prompt=None, ) mock_experiment.insert.assert_called_once_with( experiment_items=[mock.ANY, mock.ANY]