From e29e6c549992619b0081f84f3252983e7a17529e Mon Sep 17 00:00:00 2001 From: digitallysavvy Date: Thu, 26 Sep 2024 17:29:48 -0400 Subject: [PATCH 1/4] updated title --- open-ai-integration/overview/product-overview.mdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/open-ai-integration/overview/product-overview.mdx b/open-ai-integration/overview/product-overview.mdx index 6115ab28c..29bd593d8 100644 --- a/open-ai-integration/overview/product-overview.mdx +++ b/open-ai-integration/overview/product-overview.mdx @@ -7,7 +7,7 @@ description: > --- Integrating Agora’s real-time audio communication with OpenAI’s Large Language Models (LLMs) unlocks the potential for powerful, interactive voice-based applications. By combining Agora’s robust real-time audio streaming capabilities with the conversational intelligence of OpenAI’s LLMs, you can create seamless voice-enabled experiences, such as voice-powered AI assistants or interactive dialogue systems. This integration enables dynamic, responsive audio interactions, enhancing user engagement across a broad range of use cases—from customer support bots to collaborative voice-driven applications. Most importantly, by combining the strengths of Agora and OpenAI, this integration enables the most natural form of language interaction, lowering the barrier for users to harness the power of AI and making advanced technologies more accessible than ever before. + From eb9226506d02f1c65e518edb874d73bbe3ccec9b Mon Sep 17 00:00:00 2001 From: digitallysavvy Date: Thu, 26 Sep 2024 17:37:19 -0400 Subject: [PATCH 2/4] updated env vars --- shared/open-ai-integration/quickstart.mdx | 141 +++++++++++----------- 1 file changed, 70 insertions(+), 71 deletions(-) diff --git a/shared/open-ai-integration/quickstart.mdx b/shared/open-ai-integration/quickstart.mdx index 3ee35c773..0c3c7c34b 100644 --- a/shared/open-ai-integration/quickstart.mdx +++ b/shared/open-ai-integration/quickstart.mdx @@ -24,41 +24,42 @@ Follow these steps to set up your Python integration project: 1. Create a new folder for the project. - ```bash - mkdir realtime-agent - cd realtime-agent/ + ```bash + mkdir realtime-agent + cd realtime-agent/ - ``` + ``` 1. Create the following structure for your project: - ``` - /realtime-agent - ├── __init__.py - ├── .env - ├── agent.py - ├── agora - │   ├── __init__.py - │   ├── requirements.txt - │   └── rtc.py - └── realtimeapi - ├── __init__.py - ├── client.py - ├── messages.py - └── util.py - ``` - - - This project uses the OpenAI [`realtimeapi-examples`](https://openai.com/api/) package.Download the project and unzip it into your `realtime-agent` folder. - - - The following descriptions provide an overview of the key files in the project: - - - `agent.py`: The primary script responsible for executing the `RealtimeKitAgent`. It integrates Agora's functionality from the `agora/rtc.py` module and OpenAI's capabilities from the `realtimeapi` package. - - `agora/rtc.py`: Contains an implementation of the server-side Agora Python Voice SDK. - - `realtimeapi/`: Contains the classes and methods that interact with OpenAI’s Realtime API. - - The [Complete code](#complete-integration-code) for `agent.py` and `rtc.py` is provided at the bottom of this page. + ``` + /realtime-agent + ├── __init__.py + ├── .env + ├── agent.py + ├── agora + │   ├── __init__.py + │   ├── requirements.txt + │   └── rtc.py + └── realtimeapi + ├── __init__.py + ├── client.py + ├── messages.py + └── util.py + ``` + + + This project uses the OpenAI [`realtimeapi-examples`](https://openai.com/api/) package.Download the project and unzip it into your + `realtime-agent` folder. + + + The following descriptions provide an overview of the key files in the project: + + - `agent.py`: The primary script responsible for executing the `RealtimeKitAgent`. It integrates Agora's functionality from the `agora/rtc.py` module and OpenAI's capabilities from the `realtimeapi` package. + - `agora/rtc.py`: Contains an implementation of the server-side Agora Python Voice SDK. + - `realtimeapi/`: Contains the classes and methods that interact with OpenAI’s Realtime API. + + The [Complete code](#complete-integration-code) for `agent.py` and `rtc.py` is provided at the bottom of this page. 1. Open your `.env` file and add the following keys: @@ -68,9 +69,6 @@ Follow these steps to set up your Python integration project: # OpenAI API key for authentication OPENAI_API_KEY=your_openai_api_key_here - - # API base URI for the Realtime API - REALTIME_API_BASE_URI=wss://api.openai.com ``` 1. Install the dependencies: @@ -88,7 +86,8 @@ The `RealtimeKitAgent` class integrates Agora's audio communication capabilities The `setup_and_run_agent` method sets up the `RealtimeKitAgent` by connecting to an Agora channel using the provided `RtcEngine` and initializing a session with the OpenAI Realtime API client. It sends configuration messages to set up the session and define conversation parameters, such as the system message and output audio format, before starting the agent's operations. The method uses asynchronous execution to handle both listening for the session start and sending conversation configuration updates concurrently. It ensures that the connection is properly managed and cleaned up after use, even in cases of exceptions, early exits, or shutdowns. -UIDs in the Python SDK are set using a string value. Agora recommends using only numerical values for UID strings to ensure compatibility with all Agora products and extensions. + UIDs in the Python SDK are set using a string value. Agora recommends using only numerical values for UID strings to ensure compatibility + with all Agora products and extensions. ```python @@ -363,14 +362,14 @@ logger = logging.getLogger(**name**) @dataclass(frozen=True, kw_only=True) class InferenceConfig: - """Configuration for the inference process.""" - system_message: str | None = None - turn_detection: messages.TurnDetectionTypes | None = None - voice: messages.Voices | None = None +"""Configuration for the inference process.""" +system_message: str | None = None +turn_detection: messages.TurnDetectionTypes | None = None +voice: messages.Voices | None = None @dataclass(frozen=True, kw_only=True) class LocalFunctionToolDeclaration: - """Declaration of a tool that can be called by the model, and runs a function locally on the tool context.""" +"""Declaration of a tool that can be called by the model, and runs a function locally on the tool context.""" name: str description: str @@ -389,7 +388,7 @@ class LocalFunctionToolDeclaration: @dataclass(frozen=True, kw_only=True) class PassThroughFunctionToolDeclaration: - """Declaration of a tool that can be called by the model, and is passed through the LiveKit client.""" +"""Declaration of a tool that can be called by the model, and is passed through the LiveKit client.""" name: str description: str @@ -411,19 +410,19 @@ ToolDeclaration = LocalFunctionToolDeclaration | PassThroughFunctionToolDeclarat @dataclass(frozen=True, kw_only=True) class LocalToolCallExecuted: - json_encoded_output: str +json_encoded_output: str @dataclass(frozen=True, kw_only=True) class ShouldPassThroughToolCall: - decoded_function_args: dict[str, Any] +decoded_function_args: dict[str, Any] # Type alias for tool execution results ExecuteToolCallResult = LocalToolCallExecuted | ShouldPassThroughToolCall class ToolContext(abc.ABC): - """Abstract base class for managing tool declarations and executions.""" - _tool_declarations: dict[str, ToolDeclaration] +"""Abstract base class for managing tool declarations and executions.""" +\_tool_declarations: dict[str, ToolDeclaration] def __init__(self) -> None: # TODO: This should be an ordered dict @@ -481,18 +480,18 @@ class ToolContext(abc.ABC): return [v.model_description() for v in self._tool_declarations.values()] class ClientToolCallResponse(BaseModel): - tool_call_id: str - result: dict[str, Any] | str | float | int | bool | None = None +tool_call_id: str +result: dict[str, Any] | str | float | int | bool | None = None class RealtimeKitAgent: - """Main agent class for handling real-time communication and processing.""" - engine: RtcEngine - channel: Channel - client: RealtimeApiClient - audio_queue: asyncio.Queue[bytes] = asyncio.Queue() - message_queue: asyncio.Queue[messages.ResonseAudioTranscriptionDelta] = asyncio.Queue() - message_done_queue: asyncio.Queue[messages.ResonseAudioTranscriptionDone] = asyncio.Queue() - tools: ToolContext | None = None +"""Main agent class for handling real-time communication and processing.""" +engine: RtcEngine +channel: Channel +client: RealtimeApiClient +audio_queue: asyncio.Queue[bytes] = asyncio.Queue() +message_queue: asyncio.Queue[messages.ResonseAudioTranscriptionDelta] = asyncio.Queue() +message_done_queue: asyncio.Queue[messages.ResonseAudioTranscriptionDone] = asyncio.Queue() +tools: ToolContext | None = None _client_tool_futures: dict[str, asyncio.Future[ClientToolCallResponse]] @@ -690,12 +689,12 @@ class RealtimeKitAgent: logger.warning(f"Unhandled message type: {message=}") async def shutdown(loop, signal=None): - """Gracefully shut down the application.""" - if signal: - print(f"Received exit signal {signal.name}...") +"""Gracefully shut down the application.""" +if signal: +print(f"Received exit signal {signal.name}...") tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] - + print(f"Cancelling {len(tasks)} outstanding tasks") for task in tasks: task.cancel() @@ -703,22 +702,22 @@ async def shutdown(loop, signal=None): await asyncio.gather(*tasks, return_exceptions=True) loop.stop() -if __name__ == "__main__": # Load environment variables and run the agent - load_dotenv() - asyncio.run( - RealtimeKitAgent.entry_point( - engine=RtcEngine(appid="aab8b8f5a8cd4469a63042fcfafe7063"), - inference_config=InferenceConfig( - system_message="""\\ +if **name** == "**main**": # Load environment variables and run the agent +load_dotenv() +asyncio.run( +RealtimeKitAgent.entry_point( +engine=RtcEngine(appid="aab8b8f5a8cd4469a63042fcfafe7063"), +inference_config=InferenceConfig( +system_message="""\\ You are a helpful assistant. If asked about the weather, make sure to use the provided tool to get that information. \\ If you are asked a question that requires a tool, say something like "working on that" and don't provide a concrete response \\ until you have received the response to the tool call.\\ """, - voice=messages.Voices.Alloy, - turn_detection=messages.TurnDetectionTypes.SERVER_VAD, - ), - ) - ) +voice=messages.Voices.Alloy, +turn_detection=messages.TurnDetectionTypes.SERVER_VAD, +), +) +) `} From dfc6d35f65fb908a33738d6cfb093731fda8c6bc Mon Sep 17 00:00:00 2001 From: digitallysavvy Date: Thu, 26 Sep 2024 17:46:13 -0400 Subject: [PATCH 3/4] use env var --- shared/open-ai-integration/quickstart.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/open-ai-integration/quickstart.mdx b/shared/open-ai-integration/quickstart.mdx index 0c3c7c34b..7def74ef5 100644 --- a/shared/open-ai-integration/quickstart.mdx +++ b/shared/open-ai-integration/quickstart.mdx @@ -706,7 +706,7 @@ if **name** == "**main**": # Load environment variables and run the agent load_dotenv() asyncio.run( RealtimeKitAgent.entry_point( -engine=RtcEngine(appid="aab8b8f5a8cd4469a63042fcfafe7063"), +engine=RtcEngine(appid=os.getenv("AGORA_APP_ID")), inference_config=InferenceConfig( system_message="""\\ You are a helpful assistant. If asked about the weather, make sure to use the provided tool to get that information. \\ From 2d7fbe3fe5fede30f1578a2bd9e00cb08023c100 Mon Sep 17 00:00:00 2001 From: saudsami Date: Fri, 27 Sep 2024 03:39:09 +0500 Subject: [PATCH 4/4] fixed code indentation --- shared/open-ai-integration/quickstart.mdx | 68 +++++++++++------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/shared/open-ai-integration/quickstart.mdx b/shared/open-ai-integration/quickstart.mdx index 7def74ef5..f2e4d9b4c 100644 --- a/shared/open-ai-integration/quickstart.mdx +++ b/shared/open-ai-integration/quickstart.mdx @@ -362,14 +362,14 @@ logger = logging.getLogger(**name**) @dataclass(frozen=True, kw_only=True) class InferenceConfig: -"""Configuration for the inference process.""" -system_message: str | None = None -turn_detection: messages.TurnDetectionTypes | None = None -voice: messages.Voices | None = None + """Configuration for the inference process.""" + system_message: str | None = None + turn_detection: messages.TurnDetectionTypes | None = None + voice: messages.Voices | None = None @dataclass(frozen=True, kw_only=True) class LocalFunctionToolDeclaration: -"""Declaration of a tool that can be called by the model, and runs a function locally on the tool context.""" + """Declaration of a tool that can be called by the model, and runs a function locally on the tool context.""" name: str description: str @@ -388,7 +388,7 @@ class LocalFunctionToolDeclaration: @dataclass(frozen=True, kw_only=True) class PassThroughFunctionToolDeclaration: -"""Declaration of a tool that can be called by the model, and is passed through the LiveKit client.""" + """Declaration of a tool that can be called by the model, and is passed through the LiveKit client.""" name: str description: str @@ -410,19 +410,19 @@ ToolDeclaration = LocalFunctionToolDeclaration | PassThroughFunctionToolDeclarat @dataclass(frozen=True, kw_only=True) class LocalToolCallExecuted: -json_encoded_output: str + json_encoded_output: str @dataclass(frozen=True, kw_only=True) class ShouldPassThroughToolCall: -decoded_function_args: dict[str, Any] + decoded_function_args: dict[str, Any] # Type alias for tool execution results ExecuteToolCallResult = LocalToolCallExecuted | ShouldPassThroughToolCall class ToolContext(abc.ABC): -"""Abstract base class for managing tool declarations and executions.""" -\_tool_declarations: dict[str, ToolDeclaration] + """Abstract base class for managing tool declarations and executions.""" + _tool_declarations: dict[str, ToolDeclaration] def __init__(self) -> None: # TODO: This should be an ordered dict @@ -480,18 +480,18 @@ class ToolContext(abc.ABC): return [v.model_description() for v in self._tool_declarations.values()] class ClientToolCallResponse(BaseModel): -tool_call_id: str -result: dict[str, Any] | str | float | int | bool | None = None + tool_call_id: str + result: dict[str, Any] | str | float | int | bool | None = None class RealtimeKitAgent: -"""Main agent class for handling real-time communication and processing.""" -engine: RtcEngine -channel: Channel -client: RealtimeApiClient -audio_queue: asyncio.Queue[bytes] = asyncio.Queue() -message_queue: asyncio.Queue[messages.ResonseAudioTranscriptionDelta] = asyncio.Queue() -message_done_queue: asyncio.Queue[messages.ResonseAudioTranscriptionDone] = asyncio.Queue() -tools: ToolContext | None = None + """Main agent class for handling real-time communication and processing.""" + engine: RtcEngine + channel: Channel + client: RealtimeApiClient + audio_queue: asyncio.Queue[bytes] = asyncio.Queue() + message_queue: asyncio.Queue[messages.ResonseAudioTranscriptionDelta] = asyncio.Queue() + message_done_queue: asyncio.Queue[messages.ResonseAudioTranscriptionDone] = asyncio.Queue() + tools: ToolContext | None = None _client_tool_futures: dict[str, asyncio.Future[ClientToolCallResponse]] @@ -689,9 +689,9 @@ tools: ToolContext | None = None logger.warning(f"Unhandled message type: {message=}") async def shutdown(loop, signal=None): -"""Gracefully shut down the application.""" -if signal: -print(f"Received exit signal {signal.name}...") + """Gracefully shut down the application.""" + if signal: + print(f"Received exit signal {signal.name}...") tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] @@ -703,21 +703,21 @@ print(f"Received exit signal {signal.name}...") loop.stop() if **name** == "**main**": # Load environment variables and run the agent -load_dotenv() -asyncio.run( -RealtimeKitAgent.entry_point( -engine=RtcEngine(appid=os.getenv("AGORA_APP_ID")), -inference_config=InferenceConfig( -system_message="""\\ + load_dotenv() + asyncio.run( + RealtimeKitAgent.entry_point( + engine=RtcEngine(appid=os.getenv("AGORA_APP_ID")), + inference_config=InferenceConfig( + system_message="""\\ You are a helpful assistant. If asked about the weather, make sure to use the provided tool to get that information. \\ If you are asked a question that requires a tool, say something like "working on that" and don't provide a concrete response \\ until you have received the response to the tool call.\\ """, -voice=messages.Voices.Alloy, -turn_detection=messages.TurnDetectionTypes.SERVER_VAD, -), -) -) + voice=messages.Voices.Alloy, + turn_detection=messages.TurnDetectionTypes.SERVER_VAD, + ), + ) + ) `}