Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduced Credentials into conftest and refactored tests #373

Merged
merged 20 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 8 additions & 34 deletions test/agentchat/contrib/agent_eval/test_agent_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,11 @@

import pytest

import autogen
from autogen.agentchat.contrib.agent_eval.agent_eval import generate_criteria, quantify_criteria
from autogen.agentchat.contrib.agent_eval.criterion import Criterion
from autogen.agentchat.contrib.agent_eval.task import Task

from ....conftest import reason, skip_openai # noqa: E402

KEY_LOC = "notebook"
OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
from ....conftest import Credentials, reason, skip_openai # noqa: E402


def remove_ground_truth(test_case: str):
Expand All @@ -30,31 +26,8 @@ def remove_ground_truth(test_case: str):
return str(test_details), correctness


if not skip_openai:
openai_config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
# The Retrieval tool requires at least gpt-3.5-turbo-1106 (newer versions are supported) or gpt-4-turbo-preview models.
# https://platform.openai.com/docs/models/overview
filter_dict={
"api_type": ["openai"],
"model": [
"gpt-4o-mini",
"gpt-4o",
"gpt-4-turbo",
"gpt-4-turbo-preview",
"gpt-4-0125-preview",
"gpt-4-1106-preview",
],
},
)

aoai_config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
filter_dict={"api_type": ["azure"]},
)

@pytest.fixture
def task() -> Task:
success_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_successful.txt").read()
response_successful = remove_ground_truth(success_str)[0]
failed_str = open("test/test_files/agenteval-in-out/samples/sample_math_response_failed.txt").read()
Expand All @@ -67,14 +40,15 @@ def remove_ground_truth(test_case: str):
"failed_response": response_failed,
}
)
return task


@pytest.mark.skipif(
skip_openai,
reason=reason,
)
def test_generate_criteria():
criteria = generate_criteria(task=task, llm_config={"config_list": aoai_config_list})
def test_generate_criteria(credentials_azure: Credentials, task: Task):
criteria = generate_criteria(task=task, llm_config={"config_list": credentials_azure.config_list})
assert criteria
assert len(criteria) > 0
assert criteria[0].description
Expand All @@ -86,7 +60,7 @@ def test_generate_criteria():
skip_openai,
reason=reason,
)
def test_quantify_criteria():
def test_quantify_criteria(credentials_azure: Credentials, task: Task):
criteria_file = "test/test_files/agenteval-in-out/samples/sample_math_criteria.json"
criteria = open(criteria_file).read()
criteria = Criterion.parse_json_str(criteria)
Expand All @@ -95,7 +69,7 @@ def test_quantify_criteria():
test_case, ground_truth = remove_ground_truth(test_case)

quantified = quantify_criteria(
llm_config={"config_list": aoai_config_list},
llm_config={"config_list": credentials_azure.config_list},
criteria=criteria,
task=task,
test_case=test_case,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from autogen.agentchat.contrib.capabilities.teachability import Teachability
from autogen.formatting_utils import colored

from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import KEY_LOC, OAI_CONFIG_LIST # noqa: E402

# Specify the model to use. GPT-3.5 is less reliable than GPT-4 at learning from user input.
filter_dict = {"model": ["gpt-4o-mini"]}
Expand Down
27 changes: 12 additions & 15 deletions test/agentchat/contrib/capabilities/test_teachable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
from autogen import ConversableAgent, config_list_from_json
from autogen.formatting_utils import colored

from ....conftest import skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, skip_openai # noqa: E402

try:
from autogen.agentchat.contrib.capabilities.teachability import Teachability
Expand All @@ -29,17 +28,13 @@
filter_dict = {"tags": ["gpt-4o-mini"]}


def create_teachable_agent(reset_db=False, verbosity=0):
def create_teachable_agent(credentials: Credentials, reset_db=False, verbosity=0):
"""Instantiates a teachable agent using the settings from the top of this file."""
# Load LLM inference endpoints from an env variable or a file
# See https://docs.ag2.ai/docs/FAQ#set-your-api-endpoints
# and OAI_CONFIG_LIST_sample
config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, filter_dict=filter_dict, file_location=KEY_LOC)

# Start by instantiating any agent that inherits from ConversableAgent.
teachable_agent = ConversableAgent(
name="teachable_agent",
llm_config={"config_list": config_list, "timeout": 120, "cache_seed": None}, # Disable caching.
llm_config={"config_list": credentials.config_list, "timeout": 120, "cache_seed": None}, # Disable caching.
)

# Instantiate the Teachability capability. Its parameters are all optional.
Expand Down Expand Up @@ -67,11 +62,12 @@ def check_agent_response(teachable_agent, user, correct_answer):
return 0


def use_question_answer_phrasing():
def use_question_answer_phrasing(credentials: Credentials):
"""Tests whether the teachable agent can answer a question after being taught the answer in a previous chat."""
print(colored("\nTEST QUESTION-ANSWER PHRASING", "light_cyan"))
num_errors, num_tests = 0, 0
teachable_agent, teachability = create_teachable_agent(
credentials,
reset_db=True,
verbosity=0, # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
) # For a clean test, clear the agent's memory.
Expand Down Expand Up @@ -101,11 +97,12 @@ def use_question_answer_phrasing():
return num_errors, num_tests


def use_task_advice_pair_phrasing():
def use_task_advice_pair_phrasing(credentials: Credentials):
"""Tests whether the teachable agent can demonstrate a new skill after being taught a task-advice pair in a previous chat."""
print(colored("\nTEST TASK-ADVICE PHRASING", "light_cyan"))
num_errors, num_tests = 0, 0
teachable_agent, teachability = create_teachable_agent(
credentials,
reset_db=True, # For a clean test, clear the teachable agent's memory.
verbosity=3, # 0 for basic info, 1 to add memory operations, 2 for analyzer messages, 3 for memo lists.
)
Expand Down Expand Up @@ -136,17 +133,17 @@ def use_task_advice_pair_phrasing():
skip,
reason="do not run if dependency is not installed or requested to skip",
)
def test_teachability_code_paths():
def test_teachability_code_paths(credentials_gpt_4o_mini: Credentials):
"""Runs this file's unit tests."""
total_num_errors, total_num_tests = 0, 0

num_trials = 1 # Set to a higher number to get a more accurate error rate.
for trial in range(num_trials):
num_errors, num_tests = use_question_answer_phrasing()
num_errors, num_tests = use_question_answer_phrasing(credentials_gpt_4o_mini)
total_num_errors += num_errors
total_num_tests += num_tests

num_errors, num_tests = use_task_advice_pair_phrasing()
num_errors, num_tests = use_task_advice_pair_phrasing(credentials_gpt_4o_mini)
total_num_errors += num_errors
total_num_tests += num_tests

Expand All @@ -167,14 +164,14 @@ def test_teachability_code_paths():
skip,
reason="do not run if dependency is not installed or requested to skip",
)
def test_teachability_accuracy():
def test_teachability_accuracy(credentials_gpt_4o_mini: Credentials):
"""A very cheap and fast test of teachability accuracy."""
print(colored("\nTEST TEACHABILITY ACCURACY", "light_cyan"))

num_trials = 10 # The expected probability of failure is about 0.3 on each trial.
for trial in range(num_trials):
teachable_agent, teachability = create_teachable_agent(
reset_db=True, verbosity=0
credentials_gpt_4o_mini, reset_db=True, verbosity=0
) # For a clean test, clear the agent's memory.
user = ConversableAgent("user", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")

Expand Down
18 changes: 4 additions & 14 deletions test/agentchat/contrib/capabilities/test_transform_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,18 @@
from autogen.agentchat.contrib.capabilities.transform_messages import TransformMessages
from autogen.agentchat.contrib.capabilities.transforms import MessageHistoryLimiter, MessageTokenLimiter

from ....conftest import skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, skip_openai # noqa: E402


@pytest.mark.skipif(skip_openai, reason="Requested to skip openai test.")
def test_transform_messages_capability():
def test_transform_messages_capability(credentials_gpt_4o_mini: Credentials) -> None:
"""Test the TransformMessages capability to handle long contexts.

This test is a replica of test_transform_chat_history_with_agents in test_context_handling.py
"""
with tempfile.TemporaryDirectory() as temp_dir:
config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
KEY_LOC,
filter_dict={
"model": "gpt-4o-mini",
},
)

assistant = autogen.AssistantAgent(
"assistant", llm_config={"config_list": config_list}, max_consecutive_auto_reply=1
)
llm_config = credentials_gpt_4o_mini.llm_config
assistant = autogen.AssistantAgent("assistant", llm_config=llm_config, max_consecutive_auto_reply=1)

context_handling = TransformMessages(
transforms=[
Expand Down
15 changes: 4 additions & 11 deletions test/agentchat/contrib/retrievechat/test_pgvector_retrievechat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@

from autogen import AssistantAgent, config_list_from_json

from ....conftest import skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, reason, skip_openai # noqa: E402

try:
import pgvector
Expand All @@ -35,22 +34,16 @@
skip or skip_openai,
reason="dependency is not installed OR requested to skip",
)
def test_retrievechat():
def test_retrievechat(credentials_gpt_4o_mini: Credentials):
conversations = {}
# ChatCompletion.start_logging(conversations) # deprecated in v0.2

config_list = config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)

assistant = AssistantAgent(
name="assistant",
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"config_list": config_list,
"config_list": credentials_gpt_4o_mini.config_list,
},
)

Expand All @@ -72,7 +65,7 @@ def test_retrievechat():
],
"custom_text_types": ["non-existent-type"],
"chunk_token_size": 2000,
"model": config_list[0]["model"],
"model": credentials_gpt_4o_mini.config_list[0]["model"],
"vector_db": "pgvector", # PGVector database
"collection_name": "test_collection",
"db_config": {
Expand Down
16 changes: 5 additions & 11 deletions test/agentchat/contrib/retrievechat/test_qdrant_retrievechat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@

import os
import sys
from typing import Generator

import pytest

from autogen import AssistantAgent, config_list_from_json

from ....conftest import skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, reason, skip_openai # noqa: E402

try:
import fastembed
Expand All @@ -37,29 +37,22 @@
else:
skip = False or skip_openai

test_dir = os.path.join(os.path.dirname(__file__), "../../..", "test_files")


@pytest.mark.skipif(
sys.platform in ["darwin", "win32"] or not QDRANT_INSTALLED or skip,
reason="do not run on MacOS or windows OR dependency is not installed OR requested to skip",
)
def test_retrievechat():
def test_retrievechat(credentials_gpt_4o_mini: Credentials):
conversations = {}
# ChatCompletion.start_logging(conversations) # deprecated in v0.2

config_list = config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)

assistant = AssistantAgent(
name="assistant",
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"config_list": config_list,
"config_list": credentials_gpt_4o_mini.config_list,
},
)

Expand Down Expand Up @@ -99,6 +92,7 @@ def test_qdrant_filter():

@pytest.mark.skipif(not QDRANT_INSTALLED, reason="qdrant_client is not installed")
def test_qdrant_search():
test_dir = os.path.join(os.path.dirname(__file__), "../../..", "test_files")
client = QdrantClient(":memory:")
create_qdrant_from_dir(test_dir, client=client)

Expand Down
16 changes: 4 additions & 12 deletions test/agentchat/contrib/retrievechat/test_retrievechat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@

import pytest

import autogen

from ....conftest import reason, skip_openai # noqa: E402
from ...test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
from ....conftest import Credentials, reason, skip_openai # noqa: E402

try:
import chromadb
Expand All @@ -36,22 +33,17 @@
sys.platform in ["darwin", "win32"] or skip or skip_openai,
reason=reason,
)
def test_retrievechat():
def test_retrievechat(credentials_gpt_4o_mini: Credentials):
conversations = {}
# autogen.ChatCompletion.start_logging(conversations) # deprecated in v0.2

config_list = autogen.config_list_from_json(
OAI_CONFIG_LIST,
file_location=KEY_LOC,
)

assistant = AssistantAgent(
name="assistant",
system_message="You are a helpful assistant.",
llm_config={
"timeout": 600,
"seed": 42,
"config_list": config_list,
"config_list": credentials_gpt_4o_mini.config_list,
},
)

Expand All @@ -63,7 +55,7 @@ def test_retrievechat():
retrieve_config={
"docs_path": "./website/docs",
"chunk_token_size": 2000,
"model": config_list[0]["model"],
"model": credentials_gpt_4o_mini.config_list[0]["model"],
"client": chromadb.PersistentClient(path="/tmp/chromadb"),
"embedding_function": sentence_transformer_ef,
"get_or_create": True,
Expand Down
Loading
Loading