Skip to content

Commit

Permalink
Merge pull request #373 from ag2ai/refactor-testing-credentials
Browse files Browse the repository at this point in the history
Introduced Credentials into conftest and refactored tests
  • Loading branch information
davorrunje authored Jan 8, 2025
2 parents ae3f1ec + 241571c commit a1e9dc3
Show file tree
Hide file tree
Showing 45 changed files with 808 additions and 1,814 deletions.
42 changes: 8 additions & 34 deletions test/agentchat/contrib/agent_eval/test_agent_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,11 @@

import pytest

import autogen
from autogen.agentchat.contrib.agent_eval.agent_eval import generate_criteria, quantify_criteria
from autogen.agentchat.contrib.agent_eval.criterion import Criterion
from autogen.agentchat.contrib.agent_eval.task import Task

from ....conftest import reason, skip_openai # noqa: E402

KEY_LOC = "notebook"
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
from ....conftest import Credentials, reason, skip_openai # noqa: E402


def remove_ground_truth(test_case: str):
Expand All @@ -30,31 +26,8 @@ def remove_ground_truth(test_case: str):
return str(test_details), correctness


if not skip_openai:
openai_config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
# The Retrieval tool requires at least gpt-3.5-turbo-1106 (newer versions are supported) or gpt-4-turbo-preview models.
# https://platform.openai.com/docs/models/overview
filter_dict={
"api_type": ["openai"],
"model": [
"gpt-4o-mini",
"gpt-4o",
"gpt-4-turbo",
"gpt-4-turbo-preview",
"gpt-4-0125-preview",
"gpt-4-1106-preview",
],
},
)

aoai_config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"api_type": ["azure"]},
)

@pytest.fixture
def task() -> Task:
success_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_successful.txt").read()
response_successful = remove_ground_truth(success_str)[0]
failed_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_failed.txt").read()
Expand All @@ -67,14 +40,15 @@ def remove_ground_truth(test_case: str):
"failed_response": response_failed,
}
)
return task


@pytest.mark.skipif(
skip_openai,
reason=reason,
)
def test_generate_criteria():
criteria = generate_criteria(task=task, llm_config={"config_list": aoai_config_list})
def test_generate_criteria(credentials_azure: Credentials, task: Task):
criteria = generate_criteria(task=task, llm_config={"config_list": credentials_azure.config_list})
assert criteria
assert len(criteria) > 0
assert criteria[0].description
Expand All @@ -86,7 +60,7 @@ def test_generate_criteria():
skip_openai,
reason=reason,
)
def test_quantify_criteria():
def test_quantify_criteria(credentials_azure: Credentials, task: Task):
criteria_file = "test/test_files/agenteval-in-out/samples/sample_math_criteria.json"
criteria = open(criteria_file).read()
criteria = Criterion.parse_json_str(criteria)
Expand All @@ -95,7 +69,7 @@ def test_quantify_criteria():
test_case, ground_truth = remove_ground_truth(test_case)

quantified = quantify_criteria(
llm_config={"config_list": aoai_config_list},
llm_config={"config_list": credentials_azure.config_list},
criteria=criteria,
task=task,
test_case=test_case,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from autogen.agentchat.contrib.capabilities.teachability import Teachability
from autogen.formatting_utils import colored

from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import KEY_LOC, OAI_CONFIG_LIST # noqa: E402

# Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input.
filter_dict = {"model": ["gpt-4o-mini"]}
Expand Down
27 changes: 12 additions & 15 deletions test/agentchat/contrib/capabilities/test_teachable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
from autogen import ConversableAgent, config_list_from_json
from autogen.formatting_utils import colored

from ....conftest import skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, skip_openai # noqa: E402

try:
from autogen.agentchat.contrib.capabilities.teachability import Teachability
Expand All @@ -29,17 +28,13 @@
filter_dict = {"tags": ["gpt-4o-mini"]}


def create_teachable_agent(reset_db=False, verbosity=0):
def create_teachable_agent(credentials: Credentials, reset_db=False, verbosity=0):
"""Instantiates a teachable agent using the settings from the top of this file."""
# Load LLM inference endpoints from an env variable or a file
# See https://docs.ag2.ai/docs/FAQ#set-your-api-endpoints
# and OAI_CONFIG_LIST_sample
config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, filter_dict=filter_dict, file_location=KEY_LOC)

# Start by instantiating any agent that inherits from ConversableAgent.
teachable_agent = ConversableAgent(
name="teachable_agent",
llm_config={"config_list": config_list, "timeout": 120, "cache_seed": None}, # Disable caching.
llm_config={"config_list": credentials.config_list, "timeout": 120, "cache_seed": None}, # Disable caching.
)

# Instantiate the Teachability capability. Its parameters are all optional.
Expand Down Expand Up @@ -67,11 +62,12 @@ def check_agent_response(teachable_agent, user, correct_answer):
return 0


def use_question_answer_phrasing():
def use_question_answer_phrasing(credentials: Credentials):
"""Tests whether the teachable agent can answer a question after being taught the answer in a previous chat."""
print(colored("\nTEST QUESTION-ANSWER PHRASING", "light_cyan"))
num_errors, num_tests = 0, 0
teachable_agent, teachability = create_teachable_agent(
credentials,
reset_db=True,
verbosity=0, # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
) # For a clean test, clear the agent's memory.
Expand Down Expand Up @@ -101,11 +97,12 @@ def use_question_answer_phrasing():
return num_errors, num_tests


def use_task_advice_pair_phrasing():
def use_task_advice_pair_phrasing(credentials: Credentials):
"""Tests whether the teachable agent can demonstrate a new skill after being taught a task-advice pair in a previous chat."""
print(colored("\nTEST TASK-ADVICE PHRASING", "light_cyan"))
num_errors, num_tests = 0, 0
teachable_agent, teachability = create_teachable_agent(
credentials,
reset_db=True, # For a clean test, clear the teachable agent's memory.
verbosity=3, # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
)
Expand Down Expand Up @@ -136,17 +133,17 @@ def use_task_advice_pair_phrasing():
skip,
reason="do not run if dependency is not installed or requested to skip",
)
def test_teachability_code_paths():
def test_teachability_code_paths(credentials_gpt_4o_mini: Credentials):
"""Runs this file's unit tests."""
total_num_errors, total_num_tests = 0, 0

num_trials = 1 # Set to a higher number to get a more accurate error rate.
for trial in range(num_trials):
num_errors, num_tests = use_question_answer_phrasing()
num_errors, num_tests = use_question_answer_phrasing(credentials_gpt_4o_mini)
total_num_errors += num_errors
total_num_tests += num_tests

num_errors, num_tests = use_task_advice_pair_phrasing()
num_errors, num_tests = use_task_advice_pair_phrasing(credentials_gpt_4o_mini)
total_num_errors += num_errors
total_num_tests += num_tests

Expand All @@ -167,14 +164,14 @@ def test_teachability_code_paths():
skip,
reason="do not run if dependency is not installed or requested to skip",
)
def test_teachability_accuracy():
def test_teachability_accuracy(credentials_gpt_4o_mini: Credentials):
"""A very cheap and fast test of teachability accuracy."""
print(colored("\nTEST TEACHABILITY ACCURACY", "light_cyan"))

num_trials = 10 # The expected probability of failure is about 0.3 on each trial.
for trial in range(num_trials):
teachable_agent, teachability = create_teachable_agent(
reset_db=True, verbosity=0
credentials_gpt_4o_mini, reset_db=True, verbosity=0
) # For a clean test, clear the agent's memory.
user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")

Expand Down
18 changes: 4 additions & 14 deletions test/agentchat/contrib/capabilities/test_transform_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,18 @@
from autogen.agentchat.contrib.capabilities.transform_messages import TransformMessages
from autogen.agentchat.contrib.capabilities.transforms import MessageHistoryLimiter, MessageTokenLimiter

from ....conftest import skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, skip_openai # noqa: E402


@pytest.mark.skipif(skip_openai, reason="Requested to skip openai test.")
def test_transform_messages_capability():
def test_transform_messages_capability(credentials_gpt_4o_mini: Credentials) -> None:
"""Test the TransformMessages capability to handle long contexts.
This test is a replica of test_transform_chat_history_with_agents in test_context_handling.py
"""
with tempfile.TemporaryDirectory() as temp_dir:
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
KEY_LOC,
filter_dict={
"model": "gpt-4o-mini",
},
)

assistant = autogen.AssistantAgent(
"assistant", llm_config={"config_list": config_list}, max_consecutive_auto_reply=1
)
llm_config = credentials_gpt_4o_mini.llm_config
assistant = autogen.AssistantAgent("assistant", llm_config=llm_config, max_consecutive_auto_reply=1)

context_handling = TransformMessages(
transforms=[
Expand Down
15 changes: 4 additions & 11 deletions test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@

from autogen import AssistantAgent, config_list_from_json

from ....conftest import skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, reason, skip_openai # noqa: E402

try:
import pgvector
Expand All @@ -35,22 +34,16 @@
skip or skip_openai,
reason="dependency is not installed OR requested to skip",
)
def test_retrievechat():
def test_retrievechat(credentials_gpt_4o_mini: Credentials):
conversations = {}
# ChatCompletion.start_logging(conversations) # deprecated in v0.2

config_list = config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)

assistant = AssistantAgent(
name="assistant",
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"config_list": config_list,
"config_list": credentials_gpt_4o_mini.config_list,
},
)

Expand All @@ -72,7 +65,7 @@ def test_retrievechat():
],
"custom_text_types": ["non-existent-type"],
"chunk_token_size": 2000,
"model": config_list[0]["model"],
"model": credentials_gpt_4o_mini.config_list[0]["model"],
"vector_db": "pgvector", # PGVector database
"collection_name": "test_collection",
"db_config": {
Expand Down
16 changes: 5 additions & 11 deletions test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

import os
import sys
from typing import Generator

import pytest

from autogen import AssistantAgent, config_list_from_json

from ....conftest import skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, reason, skip_openai # noqa: E402

try:
import fastembed
Expand All @@ -37,29 +37,22 @@
else:
skip = False or skip_openai

test_dir = os.path.join(os.path.dirname(__file__), "../../..", "test_files")


@pytest.mark.skipif(
sys.platform in ["darwin", "win32"] or not QDRANT_INSTALLED or skip,
reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
)
def test_retrievechat():
def test_retrievechat(credentials_gpt_4o_mini: Credentials):
conversations = {}
# ChatCompletion.start_logging(conversations) # deprecated in v0.2

config_list = config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)

assistant = AssistantAgent(
name="assistant",
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"config_list": config_list,
"config_list": credentials_gpt_4o_mini.config_list,
},
)

Expand Down Expand Up @@ -99,6 +92,7 @@ def test_qdrant_filter():

@pytest.mark.skipif(not QDRANT_INSTALLED, reason="qdrant_client is not installed")
def test_qdrant_search():
test_dir = os.path.join(os.path.dirname(__file__), "../../..", "test_files")
client = QdrantClient(":memory:")
create_qdrant_from_dir(test_dir, client=client)

Expand Down
16 changes: 4 additions & 12 deletions test/agentchat/contrib/retrievechat/test_retrievechat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@

import pytest

import autogen

from ....conftest import reason, skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, reason, skip_openai # noqa: E402

try:
import chromadb
Expand All @@ -36,22 +33,17 @@
sys.platform in ["darwin", "win32"] or skip or skip_openai,
reason=reason,
)
def test_retrievechat():
def test_retrievechat(credentials_gpt_4o_mini: Credentials):
conversations = {}
# autogen.ChatCompletion.start_logging(conversations) # deprecated in v0.2

config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)

assistant = AssistantAgent(
name="assistant",
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"config_list": config_list,
"config_list": credentials_gpt_4o_mini.config_list,
},
)

Expand All @@ -63,7 +55,7 @@ def test_retrievechat():
retrieve_config={
"docs_path": "./website/docs",
"chunk_token_size": 2000,
"model": config_list[0]["model"],
"model": credentials_gpt_4o_mini.config_list[0]["model"],
"client": chromadb.PersistentClient(path="/tmp/chromadb"),
"embedding_function": sentence_transformer_ef,
"get_or_create": True,
Expand Down
Loading

0 comments on commit a1e9dc3

Please sign in to comment.