diff --git a/.github/workflows/docs-test.yml b/.github/workflows/docs-test.yml index a678e524bc..b87000ebe7 100644 --- a/.github/workflows/docs-test.yml +++ b/.github/workflows/docs-test.yml @@ -35,24 +35,9 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.9" - # - name: Install packages and dependencies for all tests - # run: | - # uv pip install --system pytest-cov>=5 - name: Install packages run: | uv pip install --system -e ".[test,docs]" - uv pip list - # - name: Install packages and dependencies for Documentation - # run: | - # uv pip install --system pydoc-markdown pyyaml termcolor nbclient - # # Pin databind packages as version 4.5.0 is not compatible with pydoc-markdown. - # uv pip install --system databind.core==4.4.2 databind.json==4.4.2 - # Force reinstall specific versions to fix typing-extensions import error in CI - # - name: Force install specific versions of typing-extensions and pydantic - # run: | - # uv pip uninstall --system -y typing_extensions typing-extensions || true - # uv pip install --system --force-reinstall "typing-extensions==4.7.1" - # uv pip install --system --force-reinstall "pydantic<2.0" - name: Run documentation tests run: | bash scripts/test.sh test/website/test_process_api_reference.py test/website/test_process_notebooks.py -m "not openai" diff --git a/autogen/exception_utils.py b/autogen/exception_utils.py index 27f7a94816..55ef0660ca 100644 --- a/autogen/exception_utils.py +++ b/autogen/exception_utils.py @@ -52,3 +52,16 @@ class UndefinedNextAgent(Exception): # noqa: N818 def __init__(self, message: str = "The provided agents list does not overlap with agents in the group."): self.message = message super().__init__(self.message) + + +class ModelToolNotSupportedError(Exception): + """ + Exception raised when attempting to use tools with models that do not support them. + """ + + def __init__( + self, + model: str, + ): + self.message = f"Tools are not supported with {model} models. Refer to the documentation at https://platform.openai.com/docs/guides/reasoning#limitations" + super().__init__(self.message) diff --git a/autogen/oai/client.py b/autogen/oai/client.py index fcc94e766e..c8882947d9 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -10,19 +10,20 @@ import logging import sys import uuid +import warnings from typing import Any, Callable, Optional, Protocol, Union from pydantic import BaseModel, schema_json_of -from autogen.cache import Cache -from autogen.io.base import IOStream -from autogen.logger.logger_utils import get_current_ts -from autogen.oai.client_utils import FormatterProtocol, logging_formatter -from autogen.oai.openai_utils import OAI_PRICE1K, get_key, is_valid_api_key -from autogen.runtime_logging import log_chat_completion, log_new_client, log_new_wrapper, logging_enabled -from autogen.token_count_utils import count_token - +from ..cache import Cache +from ..exception_utils import ModelToolNotSupportedError +from ..io.base import IOStream +from ..logger.logger_utils import get_current_ts from ..messages.client_messages import StreamMessage, UsageSummaryMessage +from ..runtime_logging import log_chat_completion, log_new_client, log_new_wrapper, logging_enabled +from ..token_count_utils import count_token +from .client_utils import FormatterProtocol, logging_formatter +from .openai_utils import OAI_PRICE1K, get_key, is_valid_api_key TOOL_ENABLED = False try: @@ -302,8 +303,11 @@ def _create_or_parse(*args, **kwargs): completions = self._oai_client.chat.completions if "messages" in params else self._oai_client.completions # type: ignore [attr-defined] create_or_parse = completions.create + # needs to be updated when the o3 is released to generalize + is_o1 = "model" in params and params["model"].startswith("o1") + # If streaming is enabled and has messages, then iterate over the chunks of the response. - if params.get("stream", False) and "messages" in params: + if params.get("stream", False) and "messages" in params and not is_o1: response_contents = [""] * params.get("n", 1) finish_reasons = [""] * params.get("n", 1) completion_tokens = 0 @@ -410,11 +414,64 @@ def _create_or_parse(*args, **kwargs): else: # If streaming is not enabled, send a regular chat completion request params = params.copy() + if is_o1: + # add a warning that model does not support stream + if params.get("stream", False): + warnings.warn( + f"The {params.get('model')} model does not support streaming. The stream will be set to False." + ) + if params.get("tools", False): + raise ModelToolNotSupportedError(params.get("model")) + self._process_reasoning_model_params(params) params["stream"] = False response = create_or_parse(**params) + # remove the system_message from the response and add it in the prompt at the start. + if is_o1: + for msg in params["messages"]: + if msg["role"] == "user" and msg["content"].startswith("System message: "): + msg["role"] = "system" + msg["content"] = msg["content"][len("System message: ") :] return response + def _process_reasoning_model_params(self, params) -> None: + """ + Cater for the reasoning model (o1, o3..) parameters + please refer: https://platform.openai.com/docs/guides/reasoning#limitations + """ + print(f"{params=}") + + # Unsupported parameters + unsupported_params = [ + "temperature", + "frequency_penalty", + "presence_penalty", + "top_p", + "logprobs", + "top_logprobs", + "logit_bias", + ] + model_name = params.get("model") + for param in unsupported_params: + if param in params: + warnings.warn(f"`{param}` is not supported with {model_name} model and will be ignored.") + params.pop(param) + # Replace max_tokens with max_completion_tokens as reasoning tokens are now factored in + # and max_tokens isn't valid + if "max_tokens" in params: + params["max_completion_tokens"] = params.pop("max_tokens") + + # TODO - When o1-mini and o1-preview point to newer models (e.g. 2024-12-...), remove them from this list but leave the 2024-09-12 dated versions + system_not_allowed = model_name in ("o1-mini", "o1-preview", "o1-mini-2024-09-12", "o1-preview-2024-09-12") + + if "messages" in params and system_not_allowed: + # o1-mini (2024-09-12) and o1-preview (2024-09-12) don't support role='system' messages, only 'user' and 'assistant' + # replace the system messages with user messages preappended with "System message: " + for msg in params["messages"]: + if msg["role"] == "system": + msg["role"] = "user" + msg["content"] = f"System message: {msg['content']}" + def cost(self, response: Union[ChatCompletion, Completion]) -> float: """Calculate the cost of the response.""" model = response.model diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py index ed34d6fad9..dd293cfcbb 100644 --- a/autogen/oai/openai_utils.py +++ b/autogen/oai/openai_utils.py @@ -31,6 +31,13 @@ DEFAULT_AZURE_API_VERSION = "2024-02-01" OAI_PRICE1K = { # https://openai.com/api/pricing/ + # o1 + "o1-preview-2024-09-12": (0.0015, 0.0060), + "o1-preview": (0.0015, 0.0060), + "o1-mini-2024-09-12": (0.0003, 0.0012), + "o1-mini": (0.0003, 0.0012), + "o1": (0.0015, 0.0060), + "o1-2024-12-17": (0.0015, 0.0060), # gpt-4o "gpt-4o": (0.005, 0.015), "gpt-4o-2024-05-13": (0.005, 0.015), diff --git a/pyproject.toml b/pyproject.toml index 37cf0afca0..0f14c3923e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ dependencies = [ "python-dotenv", "tiktoken", # Disallowing 2.6.0 can be removed when this is fixed https://github.com/pydantic/pydantic/issues/8705 - "pydantic>=1.10,<3,!=2.6.0", # could be both V1 and V2 + "pydantic>=2.6.1,<3", "docker", "packaging", "websockets>=14,<15", diff --git a/test/conftest.py b/test/conftest.py index 6a6938e3bd..b4c66197f2 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -4,6 +4,7 @@ # # Portions derived from https://github.com/microsoft/autogen are under the MIT License. # SPDX-License-Identifier: MIT +import os from pathlib import Path from typing import Any, Optional @@ -65,14 +66,21 @@ def openai_api_key(self) -> str: return self.llm_config["config_list"][0]["api_key"] # type: ignore[no-any-return] -def get_credentials(filter_dict: Optional[dict[str, Any]] = None, temperature: float = 0.0) -> Credentials: +def get_credentials( + filter_dict: Optional[dict[str, Any]] = None, temperature: float = 0.0, fail_if_empty: bool = True +) -> Credentials: """Fixture to load the LLM config.""" - config_list = autogen.config_list_from_json( - OAI_CONFIG_LIST, - filter_dict=filter_dict, - file_location=KEY_LOC, - ) - assert config_list, "No config list found" + try: + config_list = autogen.config_list_from_json( + OAI_CONFIG_LIST, + filter_dict=filter_dict, + file_location=KEY_LOC, + ) + except Exception: + config_list = [] + + if fail_if_empty: + assert config_list, "No config list found" return Credentials( llm_config={ @@ -82,12 +90,26 @@ def get_credentials(filter_dict: Optional[dict[str, Any]] = None, temperature: f ) -def get_openai_credentials(filter_dict: Optional[dict[str, Any]] = None, temperature: float = 0.0) -> Credentials: - config_list = [ - conf - for conf in get_credentials(filter_dict, temperature).config_list - if "api_type" not in conf or conf["api_type"] == "openai" - ] +def get_openai_config_list_from_env( + model: str, filter_dict: Optional[dict[str, Any]] = None, temperature: float = 0.0 +) -> Credentials: + if "OPENAI_API_KEY" in os.environ: + api_key = os.environ["OPENAI_API_KEY"] + return [{"api_key": api_key, "model": model, **filter_dict}] + + +def get_openai_credentials( + model: str, filter_dict: Optional[dict[str, Any]] = None, temperature: float = 0.0 +) -> Credentials: + config_list = get_credentials(filter_dict, temperature, fail_if_empty=False).config_list + + # Filter out non-OpenAI configs + config_list = [conf for conf in config_list if "api_type" not in conf or conf["api_type"] == "openai"] + + # If no OpenAI config found, try to get it from the environment + if config_list == []: + config_list = get_openai_config_list_from_env(model, filter_dict, temperature) + assert config_list, "No OpenAI config list found" return Credentials( @@ -122,17 +144,29 @@ def credentials_all() -> Credentials: @pytest.fixture def credentials_gpt_4o_mini() -> Credentials: - return get_openai_credentials(filter_dict={"tags": ["gpt-4o-mini"]}) + return get_openai_credentials(model="gpt-4o-mini", filter_dict={"tags": ["gpt-4o-mini"]}) @pytest.fixture def credentials_gpt_4o() -> Credentials: - return get_openai_credentials(filter_dict={"tags": ["gpt-4o"]}) + return get_openai_credentials(model="gpt-4o", filter_dict={"tags": ["gpt-4o"]}) + + +@pytest.fixture +def credentials_o1_mini() -> Credentials: + return get_openai_credentials(model="o1-mini", filter_dict={"tags": ["o1-mini"]}) + + +@pytest.fixture +def credentials_o1() -> Credentials: + return get_openai_credentials(model="o1", filter_dict={"tags": ["o1"]}) @pytest.fixture def credentials_gpt_4o_realtime() -> Credentials: - return get_openai_credentials(filter_dict={"tags": ["gpt-4o-realtime"]}, temperature=0.6) + return get_openai_credentials( + model="gpt-4o-realtime-preview", filter_dict={"tags": ["gpt-4o-realtime"]}, temperature=0.6 + ) @pytest.fixture diff --git a/test/oai/test_client.py b/test/oai/test_client.py index 7699236afc..ae969f37ed 100755 --- a/test/oai/test_client.py +++ b/test/oai/test_client.py @@ -9,12 +9,13 @@ import os import shutil import time +from collections.abc import Generator import pytest from autogen import OpenAIWrapper from autogen.cache.cache import Cache -from autogen.oai.client import LEGACY_CACHE_DIR, LEGACY_DEFAULT_CACHE_SEED +from autogen.oai.client import LEGACY_CACHE_DIR, LEGACY_DEFAULT_CACHE_SEED, OpenAIClient from ..conftest import Credentials @@ -25,7 +26,6 @@ if openai.__version__ >= "1.1.0": TOOL_ENABLED = True - from openai.types.chat.chat_completion import ChatCompletionMessage # noqa: F401 except ImportError: skip = True else: @@ -290,12 +290,149 @@ def test_cache(credentials_gpt_4o_mini: Credentials): assert not os.path.exists(os.path.join(cache_dir, str(LEGACY_DEFAULT_CACHE_SEED))) +class TestO1: + @pytest.fixture + def mock_oai_client(self, mock_credentials: Credentials) -> OpenAIClient: + config = mock_credentials.config_list[0] + api_key = config["api_key"] + return OpenAIClient(OpenAI(api_key=api_key), None) + + @pytest.fixture + def o1_mini_client(self, credentials_o1_mini: Credentials) -> Generator[OpenAIWrapper, None, None]: + config_list = credentials_o1_mini.config_list + yield OpenAIWrapper(config_list=config_list, cache_seed=42) + + @pytest.fixture + def o1_client(self, credentials_o1: Credentials) -> Generator[OpenAIWrapper, None, None]: + config_list = credentials_o1.config_list + yield OpenAIWrapper(config_list=config_list, cache_seed=42) + + def test_reasoning_remove_unsupported_params(self, mock_oai_client: OpenAIClient) -> None: + """Test that unsupported parameters are removed with appropriate warnings""" + test_params = { + "model": "o1-mini", + "temperature": 0.7, + "frequency_penalty": 1.0, + "presence_penalty": 0.5, + "top_p": 0.9, + "logprobs": 5, + "top_logprobs": 3, + "logit_bias": {1: 2}, + "valid_param": "keep_me", + } + + with pytest.warns(UserWarning) as warning_records: + mock_oai_client._process_reasoning_model_params(test_params) + + # Verify all unsupported params were removed + assert all( + param not in test_params + for param in [ + "temperature", + "frequency_penalty", + "presence_penalty", + "top_p", + "logprobs", + "top_logprobs", + "logit_bias", + ] + ) + + # Verify valid params were kept + assert "valid_param" in test_params + assert test_params["valid_param"] == "keep_me" + + # Verify appropriate warnings were raised + assert len(warning_records) == 7 # One for each unsupported param + + def test_oai_reasoning_max_tokens_replacement(self, mock_oai_client: OpenAIClient) -> None: + """Test that max_tokens is replaced with max_completion_tokens""" + test_params = {"model": "o1-mini", "max_tokens": 100} + + mock_oai_client._process_reasoning_model_params(test_params) + + assert "max_tokens" not in test_params + assert "max_completion_tokens" in test_params + assert test_params["max_completion_tokens"] == 100 + + @pytest.mark.parametrize( + ["model_name", "should_merge"], + [ + ("o1-mini", True), # TODO: Change to False when o1-mini points to a newer model, e.g. 2024-12-... + ("o1-preview", True), # TODO: Change to False when o1-preview points to a newer model, e.g. 2024-12-... + ("o1-mini-2024-09-12", True), + ("o1-preview-2024-09-12", True), + ("o1", False), + ("o1-2024-12-17", False), + ], + ) + def test_oai_reasoning_system_message_handling( + self, model_name: str, should_merge: str, mock_oai_client: OpenAIClient + ) -> None: + """Test system message handling for different model types""" + system_msg = "You are an AG2 Agent." + user_msg = "Help me with my problem." + test_params = { + "model": model_name, + "messages": [{"role": "system", "content": system_msg}, {"role": "user", "content": user_msg}], + } + + mock_oai_client._process_reasoning_model_params(test_params) + + assert len(test_params["messages"]) == 2 + if should_merge: + # Check system message was merged into user message + assert test_params["messages"][0]["content"] == f"System message: {system_msg}" + assert test_params["messages"][0]["role"] == "user" + else: + # Check messages remained unchanged + assert test_params["messages"][0]["content"] == system_msg + assert test_params["messages"][0]["role"] == "system" + + def _test_completition(self, client: OpenAIWrapper, messages: list[dict[str, str]]) -> None: + assert isinstance(client, OpenAIWrapper) + response = client.create(messages=messages, cache_seed=123) + + assert response + print(f"{response=}") + + text_or_completion_object = client.extract_text_or_completion_object(response) + print(f"{text_or_completion_object=}") + assert text_or_completion_object + assert isinstance(text_or_completion_object[0], str) + assert "4" in text_or_completion_object[0] + + @pytest.mark.parametrize( + "messages", + [ + [{"role": "system", "content": "You are an assistant"}, {"role": "user", "content": "2+2="}], + [{"role": "user", "content": "2+2="}], + ], + ) + @pytest.mark.openai + def test_completition_o1_mini(self, o1_mini_client: OpenAIWrapper, messages: list[dict[str, str]]) -> None: + self._test_completition(o1_mini_client, messages) + + @pytest.mark.parametrize( + "messages", + [ + [{"role": "system", "content": "You are an assistant"}, {"role": "user", "content": "2+2="}], + [{"role": "user", "content": "2+2="}], + ], + ) + @pytest.mark.openai + @pytest.mark.skip(reason="Wait for o1 to be available in CI") + def test_completition_o1(self, o1_client: OpenAIWrapper, messages: list[dict[str, str]]) -> None: + self._test_completition(o1_client, messages) + + if __name__ == "__main__": + pass # test_aoai_chat_completion() # test_oai_tool_calling_extraction() # test_chat_completion() - test_completion() - # # test_cost() + # test_completion() + # test_cost() # test_usage_summary() # test_legacy_cache() # test_cache()