ag2ai · davorrunje · Jan 8, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025
diff --git a/test/agentchat/contrib/agent_eval/test_agent_eval.py b/test/agentchat/contrib/agent_eval/test_agent_eval.py
@@ -10,15 +10,11 @@
 
 import pytest
 
-import autogen
 from autogen.agentchat.contrib.agent_eval.agent_eval import generate_criteria, quantify_criteria
 from autogen.agentchat.contrib.agent_eval.criterion import Criterion
 from autogen.agentchat.contrib.agent_eval.task import Task
 
-from ....conftest import reason, skip_openai  # noqa: E402
-
-KEY_LOC = "notebook"
-OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
+from ....conftest import Credentials, reason, skip_openai  # noqa: E402
 
 
 def remove_ground_truth(test_case: str):
@@ -30,31 +26,8 @@ def remove_ground_truth(test_case: str):
     return str(test_details), correctness
 
 
-if not skip_openai:
-    openai_config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-        # The Retrieval tool requires at least gpt-3.5-turbo-1106 (newer versions are supported) or gpt-4-turbo-preview models.
-        # https://platform.openai.com/docs/models/overview
-        filter_dict={
-            "api_type": ["openai"],
-            "model": [
-                "gpt-4o-mini",
-                "gpt-4o",
-                "gpt-4-turbo",
-                "gpt-4-turbo-preview",
-                "gpt-4-0125-preview",
-                "gpt-4-1106-preview",
-            ],
-        },
-    )
-
-    aoai_config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-        filter_dict={"api_type": ["azure"]},
-    )
-
+@pytest.fixture
+def task() -> Task:
     success_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_successful.txt").read()
     response_successful = remove_ground_truth(success_str)[0]
     failed_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_failed.txt").read()
@@ -67,14 +40,15 @@ def remove_ground_truth(test_case: str):
             "failed_response": response_failed,
         }
     )
+    return task
 
 
 @pytest.mark.skipif(
     skip_openai,
     reason=reason,
 )
-def test_generate_criteria():
-    criteria = generate_criteria(task=task, llm_config={"config_list": aoai_config_list})
+def test_generate_criteria(credentials_azure: Credentials, task: Task):
+    criteria = generate_criteria(task=task, llm_config={"config_list": credentials_azure.config_list})
     assert criteria
     assert len(criteria) > 0
     assert criteria[0].description
@@ -86,7 +60,7 @@ def test_generate_criteria():
     skip_openai,
     reason=reason,
 )
-def test_quantify_criteria():
+def test_quantify_criteria(credentials_azure: Credentials, task: Task):
     criteria_file = "test/test_files/agenteval-in-out/samples/sample_math_criteria.json"
     criteria = open(criteria_file).read()
     criteria = Criterion.parse_json_str(criteria)
@@ -95,7 +69,7 @@ def test_quantify_criteria():
     test_case, ground_truth = remove_ground_truth(test_case)
 
     quantified = quantify_criteria(
-        llm_config={"config_list": aoai_config_list},
+        llm_config={"config_list": credentials_azure.config_list},
         criteria=criteria,
         task=task,
         test_case=test_case,

diff --git a/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py b/test/agentchat/contrib/capabilities/chat_with_teachable_agent.py
@@ -10,7 +10,7 @@
 from autogen.agentchat.contrib.capabilities.teachability import Teachability
 from autogen.formatting_utils import colored
 
-from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
+from ....conftest import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
 
 # Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input.
 filter_dict = {"model": ["gpt-4o-mini"]}

diff --git a/test/agentchat/contrib/capabilities/test_teachable_agent.py b/test/agentchat/contrib/capabilities/test_teachable_agent.py
@@ -11,8 +11,7 @@
 from autogen import ConversableAgent, config_list_from_json
 from autogen.formatting_utils import colored
 
-from ....conftest import skip_openai  # noqa: E402
-from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
+from ....conftest import Credentials, skip_openai  # noqa: E402
 
 try:
     from autogen.agentchat.contrib.capabilities.teachability import Teachability
@@ -29,17 +28,13 @@
 filter_dict = {"tags": ["gpt-4o-mini"]}
 
 
-def create_teachable_agent(reset_db=False, verbosity=0):
+def create_teachable_agent(credentials: Credentials, reset_db=False, verbosity=0):
     """Instantiates a teachable agent using the settings from the top of this file."""
-    # Load LLM inference endpoints from an env variable or a file
-    # See https://docs.ag2.ai/docs/FAQ#set-your-api-endpoints
-    # and OAI_CONFIG_LIST_sample
-    config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, filter_dict=filter_dict, file_location=KEY_LOC)
 
     # Start by instantiating any agent that inherits from ConversableAgent.
     teachable_agent = ConversableAgent(
         name="teachable_agent",
-        llm_config={"config_list": config_list, "timeout": 120, "cache_seed": None},  # Disable caching.
+        llm_config={"config_list": credentials.config_list, "timeout": 120, "cache_seed": None},  # Disable caching.
     )
 
     # Instantiate the Teachability capability. Its parameters are all optional.
@@ -67,11 +62,12 @@ def check_agent_response(teachable_agent, user, correct_answer):
         return 0
 
 
-def use_question_answer_phrasing():
+def use_question_answer_phrasing(credentials: Credentials):
     """Tests whether the teachable agent can answer a question after being taught the answer in a previous chat."""
     print(colored("\nTEST QUESTION-ANSWER PHRASING", "light_cyan"))
     num_errors, num_tests = 0, 0
     teachable_agent, teachability = create_teachable_agent(
+        credentials,
         reset_db=True,
         verbosity=0,  # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
     )  # For a clean test, clear the agent's memory.
@@ -101,11 +97,12 @@ def use_question_answer_phrasing():
     return num_errors, num_tests
 
 
-def use_task_advice_pair_phrasing():
+def use_task_advice_pair_phrasing(credentials: Credentials):
     """Tests whether the teachable agent can demonstrate a new skill after being taught a task-advice pair in a previous chat."""
     print(colored("\nTEST TASK-ADVICE PHRASING", "light_cyan"))
     num_errors, num_tests = 0, 0
     teachable_agent, teachability = create_teachable_agent(
+        credentials,
         reset_db=True,  # For a clean test, clear the teachable agent's memory.
         verbosity=3,  # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
     )
@@ -136,17 +133,17 @@ def use_task_advice_pair_phrasing():
     skip,
     reason="do not run if dependency is not installed or requested to skip",
 )
-def test_teachability_code_paths():
+def test_teachability_code_paths(credentials_gpt_4o_mini: Credentials):
     """Runs this file's unit tests."""
     total_num_errors, total_num_tests = 0, 0
 
     num_trials = 1  # Set to a higher number to get a more accurate error rate.
     for trial in range(num_trials):
-        num_errors, num_tests = use_question_answer_phrasing()
+        num_errors, num_tests = use_question_answer_phrasing(credentials_gpt_4o_mini)
         total_num_errors += num_errors
         total_num_tests += num_tests
 
-        num_errors, num_tests = use_task_advice_pair_phrasing()
+        num_errors, num_tests = use_task_advice_pair_phrasing(credentials_gpt_4o_mini)
         total_num_errors += num_errors
         total_num_tests += num_tests
 
@@ -167,14 +164,14 @@ def test_teachability_code_paths():
     skip,
     reason="do not run if dependency is not installed or requested to skip",
 )
-def test_teachability_accuracy():
+def test_teachability_accuracy(credentials_gpt_4o_mini: Credentials):
     """A very cheap and fast test of teachability accuracy."""
     print(colored("\nTEST TEACHABILITY ACCURACY", "light_cyan"))
 
     num_trials = 10  # The expected probability of failure is about 0.3 on each trial.
     for trial in range(num_trials):
         teachable_agent, teachability = create_teachable_agent(
-            reset_db=True, verbosity=0
+            credentials_gpt_4o_mini, reset_db=True, verbosity=0
         )  # For a clean test, clear the agent's memory.
         user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
 

diff --git a/test/agentchat/contrib/capabilities/test_transform_messages.py b/test/agentchat/contrib/capabilities/test_transform_messages.py
@@ -12,28 +12,18 @@
 from autogen.agentchat.contrib.capabilities.transform_messages import TransformMessages
 from autogen.agentchat.contrib.capabilities.transforms import MessageHistoryLimiter, MessageTokenLimiter
 
-from ....conftest import skip_openai  # noqa: E402
-from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
+from ....conftest import Credentials, skip_openai  # noqa: E402
 
 
 @pytest.mark.skipif(skip_openai, reason="Requested to skip openai test.")
-def test_transform_messages_capability():
+def test_transform_messages_capability(credentials_gpt_4o_mini: Credentials) -> None:
     """Test the TransformMessages capability to handle long contexts.
 
     This test is a replica of test_transform_chat_history_with_agents in test_context_handling.py
     """
     with tempfile.TemporaryDirectory() as temp_dir:
-        config_list = autogen.config_list_from_json(
-            OAI_CONFIG_LIST,
-            KEY_LOC,
-            filter_dict={
-                "model": "gpt-4o-mini",
-            },
-        )
-
-        assistant = autogen.AssistantAgent(
-            "assistant", llm_config={"config_list": config_list}, max_consecutive_auto_reply=1
-        )
+        llm_config = credentials_gpt_4o_mini.llm_config
+        assistant = autogen.AssistantAgent("assistant", llm_config=llm_config, max_consecutive_auto_reply=1)
 
         context_handling = TransformMessages(
             transforms=[

diff --git a/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py b/test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
@@ -13,8 +13,7 @@
 
 from autogen import AssistantAgent, config_list_from_json
 
-from ....conftest import skip_openai  # noqa: E402
-from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
+from ....conftest import Credentials, reason, skip_openai  # noqa: E402
 
 try:
     import pgvector
@@ -35,22 +34,16 @@
     skip or skip_openai,
     reason="dependency is not installed OR requested to skip",
 )
-def test_retrievechat():
+def test_retrievechat(credentials_gpt_4o_mini: Credentials):
     conversations = {}
-    # ChatCompletion.start_logging(conversations)  # deprecated in v0.2
-
-    config_list = config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-    )
 
     assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
             "timeout": 600,
             "seed": 42,
-            "config_list": config_list,
+            "config_list": credentials_gpt_4o_mini.config_list,
         },
     )
 
@@ -72,7 +65,7 @@ def test_retrievechat():
             ],
             "custom_text_types": ["non-existent-type"],
             "chunk_token_size": 2000,
-            "model": config_list[0]["model"],
+            "model": credentials_gpt_4o_mini.config_list[0]["model"],
             "vector_db": "pgvector",  # PGVector database
             "collection_name": "test_collection",
             "db_config": {

diff --git a/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py b/test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
@@ -8,13 +8,13 @@
 
 import os
 import sys
+from typing import Generator
 
 import pytest
 
 from autogen import AssistantAgent, config_list_from_json
 
-from ....conftest import skip_openai  # noqa: E402
-from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
+from ....conftest import Credentials, reason, skip_openai  # noqa: E402
 
 try:
     import fastembed
@@ -37,29 +37,22 @@
 else:
     skip = False or skip_openai
 
-test_dir = os.path.join(os.path.dirname(__file__), "../../..", "test_files")
-
 
 @pytest.mark.skipif(
     sys.platform in ["darwin", "win32"] or not QDRANT_INSTALLED or skip,
     reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
 )
-def test_retrievechat():
+def test_retrievechat(credentials_gpt_4o_mini: Credentials):
     conversations = {}
     # ChatCompletion.start_logging(conversations)  # deprecated in v0.2
 
-    config_list = config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-    )
-
     assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
             "timeout": 600,
             "seed": 42,
-            "config_list": config_list,
+            "config_list": credentials_gpt_4o_mini.config_list,
         },
     )
 
@@ -99,6 +92,7 @@ def test_qdrant_filter():
 
 @pytest.mark.skipif(not QDRANT_INSTALLED, reason="qdrant_client is not installed")
 def test_qdrant_search():
+    test_dir = os.path.join(os.path.dirname(__file__), "../../..", "test_files")
     client = QdrantClient(":memory:")
     create_qdrant_from_dir(test_dir, client=client)
 

diff --git a/test/agentchat/contrib/retrievechat/test_retrievechat.py b/test/agentchat/contrib/retrievechat/test_retrievechat.py
@@ -10,10 +10,7 @@
 
 import pytest
 
-import autogen
-
-from ....conftest import reason, skip_openai  # noqa: E402
-from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
+from ....conftest import Credentials, reason, skip_openai  # noqa: E402
 
 try:
     import chromadb
@@ -36,22 +33,17 @@
     sys.platform in ["darwin", "win32"] or skip or skip_openai,
     reason=reason,
 )
-def test_retrievechat():
+def test_retrievechat(credentials_gpt_4o_mini: Credentials):
     conversations = {}
     # autogen.ChatCompletion.start_logging(conversations)  # deprecated in v0.2
 
-    config_list = autogen.config_list_from_json(
-        OAI_CONFIG_LIST,
-        file_location=KEY_LOC,
-    )
-
     assistant = AssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
             "timeout": 600,
             "seed": 42,
-            "config_list": config_list,
+            "config_list": credentials_gpt_4o_mini.config_list,
         },
     )
 
@@ -63,7 +55,7 @@ def test_retrievechat():
         retrieve_config={
             "docs_path": "./website/docs",
             "chunk_token_size": 2000,
-            "model": config_list[0]["model"],
+            "model": credentials_gpt_4o_mini.config_list[0]["model"],
             "client": chromadb.PersistentClient(path="/tmp/chromadb"),
             "embedding_function": sentence_transformer_ef,
             "get_or_create": True,