Skip to content

Commit

Permalink
chore: update version to 0.4.0
Browse files Browse the repository at this point in the history
add: function_min_similarity to limit functions from embeddings
add: function_max_tokens, controls the max tokens functions definitions can use. Will drop functions once over the limit.
add: max_event_size: some functions can return a megs of data as an event, this can now be limited to a max number of bytes
chore: refactor event handling
chore: update readme
  • Loading branch information
Lee Huffman committed Jan 5, 2024
1 parent 96e43a1 commit ec7d2af
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 28 deletions.
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,26 @@ can be passed to customize the agent's behavior. Below is a detailed description
- **Type**: `int`
- **Default**: `3`

#### `functions_min_similarity`

- **Description**: The minimum similarity score for a function to be considered when handling a query.
- **Type**: `float`
- **Default**: `0.5`

#### `function_max_tokens`

- **Description**: The maximum number of tokens to allow towards function definitions. This is useful for preventing
using a large number of tokens from function definitions, thus lowering costs and preventing AI errors. Set to 0 for
unlimited token usage
- **Type**: `int`
- **Default**: `2000`

#### `use_tool_calls`

- **Description**: Whether to use the new OpenAI Tool Calls vs the now deprecated Function calls
- **Type**: `bool`
- **Default**: `True`

#### `system_message`

- **Description**: A system message that sets the context for the agent.
Expand Down Expand Up @@ -182,6 +202,13 @@ can be passed to customize the agent's behavior. Below is a detailed description
- **Type**: `bool`
- **Default**: `False`

#### `max_event_size`

- **Description**: The maximum size of an event in bytes. Allows limiting sending large data streams from a function
response
- **Type**: `int`
- **Default**: `2000`

### Example of Initialization

Here's an example of how you might initialize a `CompletionAgent` with some of these parameters:
Expand Down
28 changes: 24 additions & 4 deletions nimbusagent/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def __init__(
functions_always_use: Optional[List[str]] = None,
functions_pattern_groups: Optional[List[dict]] = None,
functions_k_closest: int = 3,
function_min_similarity: float = 0.5,

function_max_tokens: int = 2000,
use_tool_calls: bool = True,

system_message: str = SYS_MSG,
Expand All @@ -50,6 +53,7 @@ def __init__(
loops_max: int = 10,

send_events: bool = False,
max_event_size: int = 2000,
):
"""
Base Agent Class for Nimbus Agent
Expand All @@ -62,8 +66,10 @@ def __init__(
functions: The list of functions to use (default: None)
functions_embeddings: The list of function embeddings to use (default: None)
functions_pattern_groups: The list of function pattern groups to use (default: None)
functions_k_closest: The number of closest functions to use (default: 3)
functions_k_closest: The number of closest embedding functions to use (default: 3)
function_min_similarity: The minimum similarity to use for embedding functions (default: 0.5)
functions_always_use: The list of functions to always use (default: None)
function_max_tokens: The maximum number of tokens to allow for function call. (default: 2500) 0 = unlimited
use_tool_calls: True if parallel functions should be allowed (default: True). Functions are being
deprecated though tool_calls are still a bit beta, so for now this can be set to
False to continue using function calls.
Expand All @@ -80,6 +86,7 @@ def __init__(
internal_thoughts_max_entries: The maximum number of entries to store in the internal thoughts (default: 3)
loops_max: The maximum number of loops to allow (default: 5)
send_events: True if events should be sent (default: False)
max_event_size: The maximum size of an event (default: 2000)
"""

self.client = OpenAI(api_key=openai_api_key if openai_api_key is not None else os.getenv("OPENAI_API_KEY"))
Expand All @@ -101,6 +108,7 @@ def __init__(
self.moderation_fail_message = moderation_fail_message
self.loops_max = loops_max
self.send_events = send_events
self.max_event_size = max_event_size
self.calling_function_start_callback = calling_function_start_callback
self.calling_function_stop_callback = calling_function_stop_callback

Expand All @@ -110,8 +118,14 @@ def __init__(
raise ValueError('The message history contains inappropriate content.')
self.chat_history.set_chat_history(message_history)

self.function_handler = self._init_function_handler(functions, functions_embeddings, functions_k_closest,
functions_always_use, functions_pattern_groups)
self.function_handler = self._init_function_handler(
functions=functions,
functions_embeddings=functions_embeddings,
functions_k_closest=functions_k_closest,
functions_always_use=functions_always_use,
functions_pattern_groups=functions_pattern_groups,
function_max_tokens=function_max_tokens,
function_min_similarity=function_min_similarity)
self.use_tool_calls = use_tool_calls

def set_system_message(self, message: str) -> None:
Expand All @@ -122,8 +136,10 @@ def set_system_message(self, message: str) -> None:

def _init_function_handler(self, functions: Optional[List], functions_embeddings: Optional[List],
functions_k_closest: int = 3,
function_min_similarity: float = 0.5,
functions_always_use: Optional[List[str]] = None,
functions_pattern_groups: Optional[List[dict]] = None) -> FunctionHandler:
functions_pattern_groups: Optional[List[dict]] = None,
function_max_tokens: int = 0) -> FunctionHandler:
"""Initializes the function handler.
Returns a FunctionHandler instance.
Expand All @@ -139,10 +155,12 @@ def _init_function_handler(self, functions: Optional[List], functions_embeddings
functions=functions,
embeddings=functions_embeddings,
k_nearest=functions_k_closest,
min_similarity=function_min_similarity,
always_use=functions_always_use,
pattern_groups=functions_pattern_groups,
calling_function_start_callback=self.calling_function_start_callback,
calling_function_stop_callback=self.calling_function_stop_callback,
max_tokens=function_max_tokens,
chat_history=self.chat_history
)

Expand All @@ -167,6 +185,7 @@ def _create_chat_completion(

if use_functions and self.function_handler.functions and not force_no_functions:
if self.use_tool_calls:
# noinspection PyTypeChecker
res = self.client.chat.completions.create(
model=model_name,
temperature=self.temperature,
Expand All @@ -175,6 +194,7 @@ def _create_chat_completion(
tool_choice=function_call,
stream=stream)
else:
# noinspection PyTypeChecker
res = self.client.chat.completions.create(
model=model_name,
temperature=self.temperature,
Expand Down
28 changes: 20 additions & 8 deletions nimbusagent/agent/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

from nimbusagent.agent.base import BaseAgent, HAVING_TROUBLE_MSG

EVENT_TYPE_FUNCTION = "function"
EVENT_TYPE_DATA = "data"


class StreamingAgent(BaseAgent):
"""Agent that streams responses to the user and can hanldle openai function calls.
Expand Down Expand Up @@ -56,6 +59,18 @@ def output_post_content(post_content: List[str]):
return f"{' '.join(post_content)}\n"
return ""

def output_event(event_type: str, name: str, data: any):

if not data:
return f"[[[{event_type}:{name}]]]\n"

if not isinstance(data, str):
data = json.dumps(data)
if len(data) > self.max_event_size:
data = '{"error":"data too large"}'

return f"[[[{event_type}:{name}:{data}]]]\n"

loops = 0
post_content_items = []
use_secondary_model = False
Expand Down Expand Up @@ -138,15 +153,13 @@ def output_post_content(post_content: List[str]):
func_args = tool_call['function']["arguments"]

if self.send_events:
json_data = json.dumps(func_args)
yield f"[[[function:{func_name}:{json_data}]]]"
yield output_event(EVENT_TYPE_FUNCTION, func_name, func_args)

func_results = self.function_handler.handle_function_call(func_name, func_args)
if func_results is not None:
if func_results.stream_data and self.send_events:
for key, value in func_results.stream_data.items():
json_value = json.dumps(value)
yield f"[[[data:{key}:{json_value}]]]"
yield output_event(EVENT_TYPE_DATA, key, value)

if func_results.send_directly_to_user and func_results.content:
content_send_directly_to_user.append(func_results.content)
Expand Down Expand Up @@ -174,8 +187,8 @@ def output_post_content(post_content: List[str]):

elif finish_reason == "function_call":
if self.send_events:
json_data = json.dumps(self.function_handler.get_args(func_call['arguments']))
yield f"[[[function:{func_call['name']}:{json_data}]]]"
yield output_event(EVENT_TYPE_FUNCTION, func_call['name'],
json.dumps(self.function_handler.get_args(func_call['arguments'])))

# Handle function call
logging.info("Handling function call: %s", func_call)
Expand All @@ -184,8 +197,7 @@ def output_post_content(post_content: List[str]):
if func_results is not None:
if func_results.stream_data and self.send_events:
for key, value in func_results.stream_data.items():
json_value = json.dumps(value)
yield f"[[[data:{key}:{json_value}]]]"
yield output_event(EVENT_TYPE_DATA, key, value)

if func_results.send_directly_to_user and func_results.content:
yield func_results.content
Expand Down
15 changes: 10 additions & 5 deletions nimbusagent/functions/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,23 +51,28 @@ class FunctionHandler:
pattern_groups = None
chat_history: AgentMemory = None
processed_functions = None
max_tokens = 0

def __init__(self, functions: list = None,
embeddings: list = None,
k_nearest: int = 3,
min_similarity: float = 0.5,
always_use: list = None,
pattern_groups: list = None,
calling_function_start_callback: callable = None,
calling_function_stop_callback: callable = None,
chat_history: AgentMemory = None
chat_history: AgentMemory = None,
max_tokens: int = 0
):

self.embeddings = embeddings
self.k_nearest = k_nearest
self.min_similarity = min_similarity
self.always_use = always_use
self.pattern_groups = pattern_groups
self.chat_history = chat_history
self.encoding = tiktoken.get_encoding("cl100k_base")
self.max_tokens = max_tokens

self.orig_functions = {func.__name__: func for func in functions} if functions else None
if not embeddings:
Expand Down Expand Up @@ -176,12 +181,11 @@ def _set_functions_and_mappings(self, functions: Optional[List[FunctionInfo]] =
self.functions = None
self.func_mapping = None

def get_functions_from_query_and_history(self, query: str, history: List[Dict[str, Any]], max_tokens: int = 2250):
def get_functions_from_query_and_history(self, query: str, history: List[Dict[str, Any]]):
"""
Get the functions to use based on the query and history.
:param query: The query to use.
:param history: The history to use. A list of dictionaries with 'role' and 'content' fields.
:param max_tokens: The maximum number of tokens to use. Defaults to 2250.
"""
if not self.orig_functions:
return None
Expand Down Expand Up @@ -216,7 +220,7 @@ def get_functions_from_query_and_history(self, query: str, history: List[Dict[st
if query_group_functions:
actual_function_names = combine_lists_unique(actual_function_names, query_group_functions)

logging.info(f"Functions to use: {actual_function_names}")
logging.info(f"Actual Functions Names to use: {actual_function_names}")
# step 5: step through functions and get the function info, adding up to max_tokens

processed_functions = []
Expand All @@ -226,11 +230,12 @@ def get_functions_from_query_and_history(self, query: str, history: List[Dict[st
if func_info:
processed_functions.append(func_info)
token_count += func_info.tokens
if token_count >= max_tokens:
if 0 < self.max_tokens <= token_count:
break

self.processed_functions = processed_functions
using_functions = [func.name for func in processed_functions]
logging.info(f"query: {query}")
logging.info(f"Using functions: {using_functions}")
logging.info(f"Total tokens: {token_count}")

Expand Down
26 changes: 16 additions & 10 deletions nimbusagent/utils/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,18 @@ def cosine_similarity(list1, list2):
:param list2: The second vector.
:return: The cosine similarity of the two vectors.
"""
return 1 - dot(list1, list2) / (norm(list1) * norm(list2))
return dot(list1, list2) / (norm(list1) * norm(list2))


def find_similar_embedding_list(query: str, function_embeddings: list, k_nearest_neighbors: int = 1):
"""Return the k function descriptions most similar (least cosine distance) to given query
def find_similar_embedding_list(query: str, function_embeddings: list, k_nearest_neighbors: int = 1,
min_similarity: float = 0.5):
"""
Return the k function descriptions most similar to given query.
:param query: The query to check.
:param function_embeddings: The list of function embeddings to compare to.
:param k_nearest_neighbors: The number of nearest neighbors to return.
:return: The k function descriptions most similar (least cosine distance) to given query
:param min_similarity: The minimum cosine similarity to consider a function relevant.
:return: The k function descriptions most similar to given query.
"""
if not function_embeddings or len(function_embeddings) == 0 or not query:
return None
Expand All @@ -77,15 +80,18 @@ def find_similar_embedding_list(query: str, function_embeddings: list, k_nearest
if not query_embedding:
return None

distances = []
similarities = []
for function_embedding in function_embeddings:
dist = cosine_similarity(query_embedding, function_embedding['embedding'])
distances.append(
{'name': function_embedding['name'], 'distance': dist})
similarity = cosine_similarity(query_embedding, function_embedding['embedding'])
if similarity >= min_similarity:
similarities.append({'name': function_embedding['name'], 'similarity': similarity})

# Sort the results by similarity in descending order (most similar first)
sorted_similarities = sorted(similarities, key=lambda x: x['similarity'], reverse=True)

sorted_distances = sorted(distances, key=lambda x: x['distance'])
# Return the top k nearest neighbors
return sorted_similarities[:k_nearest_neighbors]

return sorted_distances[:k_nearest_neighbors]


def combine_lists_unique(list1: Iterable[Any], set2: Union[Iterable[Any], set]) -> List[Any]:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "nimbusagent"
version = "0.3.1"
version = "0.4.0"
description = "An OpenAI agent with basic memory, functions, and moderation support"
readme = "README.md"
license = { file = "LICENSE.txt" }
Expand Down

0 comments on commit ec7d2af

Please sign in to comment.