diff --git a/README.md b/README.md index 4094ecf..1931152 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,12 @@ can be passed to customize the agent's behavior. Below is a detailed description - **Type**: `Optional[List[dict]]` - **Default**: `None` +#### `functions_embeddings_model` + +- **Description**: The name of the OpenAI model to use for generating embeddings for the functions. +- **Type**: `str` +- **Default**: `'text-embedding-ada-002'` + #### `functions_always_use` - **Description**: Functions that should always be used by the agent. diff --git a/nimbusagent/agent/base.py b/nimbusagent/agent/base.py index f6749d4..5a7afb7 100644 --- a/nimbusagent/agent/base.py +++ b/nimbusagent/agent/base.py @@ -6,7 +6,7 @@ from nimbusagent.functions.handler import FunctionHandler from nimbusagent.memory.base import AgentMemory -from nimbusagent.utils.helper import is_query_safe +from nimbusagent.utils.helper import is_query_safe, FUNCTIONS_EMBEDDING_MODEL SYS_MSG = """You are a helpful assistant.""" @@ -29,6 +29,7 @@ def __init__( functions: Optional[list] = None, functions_embeddings: Optional[List[dict]] = None, + functions_embeddings_model: str = FUNCTIONS_EMBEDDING_MODEL, functions_always_use: Optional[List[str]] = None, functions_pattern_groups: Optional[List[dict]] = None, functions_k_closest: int = 3, @@ -65,6 +66,7 @@ def __init__( temperature: The temperature for the response sampling (default: 0.1) functions: The list of functions to use (default: None) functions_embeddings: The list of function embeddings to use (default: None) + functions_embeddings_model: The model to use for function embeddings (default: 'text-embedding-ada-002') functions_pattern_groups: The list of function pattern groups to use (default: None) functions_k_closest: The number of closest embedding functions to use (default: 3) function_min_similarity: The minimum similarity to use for embedding functions (default: 0.5) @@ -121,6 +123,7 @@ def __init__( self.function_handler = self._init_function_handler( functions=functions, functions_embeddings=functions_embeddings, + functions_embeddings_model=functions_embeddings_model, functions_k_closest=functions_k_closest, functions_always_use=functions_always_use, functions_pattern_groups=functions_pattern_groups, @@ -134,7 +137,10 @@ def set_system_message(self, message: str) -> None: """ self.system_message = {"role": "system", "content": message} - def _init_function_handler(self, functions: Optional[List], functions_embeddings: Optional[List], + def _init_function_handler(self, + functions: Optional[List], + functions_embeddings: Optional[List], + functions_embeddings_model: str = FUNCTIONS_EMBEDDING_MODEL, functions_k_closest: int = 3, function_min_similarity: float = 0.5, functions_always_use: Optional[List[str]] = None, @@ -154,6 +160,7 @@ def _init_function_handler(self, functions: Optional[List], functions_embeddings return FunctionHandler( functions=functions, embeddings=functions_embeddings, + embeddings_model=functions_embeddings_model, k_nearest=functions_k_closest, min_similarity=function_min_similarity, always_use=functions_always_use, diff --git a/nimbusagent/functions/handler.py b/nimbusagent/functions/handler.py index 307cbd3..375c846 100644 --- a/nimbusagent/functions/handler.py +++ b/nimbusagent/functions/handler.py @@ -12,7 +12,7 @@ from nimbusagent.functions import parser from nimbusagent.functions.responses import FuncResponse, DictFuncResponse from nimbusagent.memory.base import AgentMemory -from nimbusagent.utils.helper import find_similar_embedding_list, combine_lists_unique +from nimbusagent.utils.helper import find_similar_embedding_list, combine_lists_unique, FUNCTIONS_EMBEDDING_MODEL @dataclass @@ -55,6 +55,7 @@ class FunctionHandler: def __init__(self, functions: list = None, embeddings: list = None, + embeddings_model: str = FUNCTIONS_EMBEDDING_MODEL, k_nearest: int = 3, min_similarity: float = 0.5, always_use: list = None, @@ -66,6 +67,7 @@ def __init__(self, functions: list = None, ): self.embeddings = embeddings + self.embeddings_model = embeddings_model self.k_nearest = k_nearest self.min_similarity = min_similarity self.always_use = always_use @@ -210,6 +212,7 @@ def get_functions_from_query_and_history(self, query: str, history: List[Dict[st if self.embeddings: similar_functions = find_similar_embedding_list(recent_history_and_query_str, function_embeddings=self.embeddings, + embeddings_model=self.embeddings_model, k_nearest_neighbors=self.k_nearest) similar_function_names = [d['name'] for d in similar_functions] if similar_function_names: diff --git a/nimbusagent/utils/helper.py b/nimbusagent/utils/helper.py index 2ebde4f..a96f5d8 100644 --- a/nimbusagent/utils/helper.py +++ b/nimbusagent/utils/helper.py @@ -2,8 +2,7 @@ import os from typing import Union, List, Iterable, Any -from numpy import dot -from numpy.linalg import norm +import numpy as np from openai import OpenAI FUNCTIONS_EMBEDDING_MODEL = "text-embedding-ada-002" @@ -38,7 +37,7 @@ def is_query_safe(query: str, api_key=None) -> bool: def get_embedding(text, model=FUNCTIONS_EMBEDDING_MODEL, api_key=None): """Returns the embedding of the given text. :param text: The text to get the embedding of. - :param model: The model to use. Defaults to the text-embedding-ada-002 model. + :param model: The model to use. Defaults to the text-embedding-3-small model. :param api_key: The OpenAI API key to use. Uses the OPENAI_API_KEY environment variable if not provided. :return: The embedding of the given text. """ @@ -54,19 +53,23 @@ def get_embedding(text, model=FUNCTIONS_EMBEDDING_MODEL, api_key=None): return None -def cosine_similarity(list1, list2): +def cosine_similarity(a, b): """ get cosine similarity of two vector of same dimensions - :param list1: The first vector. - :param list2: The second vector. + :param a: The first vector. + :param b: The second vector. :return: The cosine similarity of the two vectors. """ - return dot(list1, list2) / (norm(list1) * norm(list2)) + return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) -def find_similar_embedding_list(query: str, function_embeddings: list, k_nearest_neighbors: int = 1, - min_similarity: float = 0.5): +def find_similar_embedding_list(query: str, + function_embeddings: list, + embeddings_model: str = FUNCTIONS_EMBEDDING_MODEL, + k_nearest_neighbors: int = 1, + min_similarity: float = 0.1): """ Return the k function descriptions most similar to given query. + :param embeddings_model: :param query: The query to check. :param function_embeddings: The list of function embeddings to compare to. :param k_nearest_neighbors: The number of nearest neighbors to return. @@ -76,7 +79,7 @@ def find_similar_embedding_list(query: str, function_embeddings: list, k_nearest if not function_embeddings or len(function_embeddings) == 0 or not query: return None - query_embedding = get_embedding(query) + query_embedding = get_embedding(query, model=embeddings_model) if not query_embedding: return None @@ -93,7 +96,6 @@ def find_similar_embedding_list(query: str, function_embeddings: list, k_nearest return sorted_similarities[:k_nearest_neighbors] - def combine_lists_unique(list1: Iterable[Any], set2: Union[Iterable[Any], set]) -> List[Any]: """Combine two lists, removing duplicates. :param list1: The first list. diff --git a/pyproject.toml b/pyproject.toml index a2f0fd5..fcdb3a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "nimbusagent" -version = "0.4.1" +version = "0.4.3" description = "An OpenAI agent with basic memory, functions, and moderation support" readme = "README.md" license = { file = "LICENSE.txt" }