Agent API Early Access

deepgram · Nov 7, 2024 · 51f595d · 51f595d
1 parent aff6110
commit 51f595d
Show file tree

Hide file tree

Showing 20 changed files with 2,653 additions and 16 deletions.
diff --git a/deepgram/__init__.py b/deepgram/__init__.py
@@ -34,7 +34,7 @@
 from .errors import DeepgramApiKeyError
 
 # listen/read client
-from .client import Listen, Read
+from .client import ListenRouter, ReadRouter, SpeakRouter, AgentRouter
 
 # common
 from .client import (
@@ -302,6 +302,57 @@
     AsyncSelfHostedClient,
 )
 
+
+# agent
+from .client import AgentWebSocketEvents
+
+# websocket
+from .client import (
+    AgentWebSocketClient,
+    AsyncAgentWebSocketClient,
+)
+
+from .client import (
+    #### common websocket response
+    # OpenResponse,
+    # CloseResponse,
+    # ErrorResponse,
+    # UnhandledResponse,
+    #### unique
+    WelcomeResponse,
+    SettingsAppliedResponse,
+    ConversationTextResponse,
+    UserStartedSpeakingResponse,
+    AgentThinkingResponse,
+    FunctionCallingResponse,
+    AgentStartedSpeakingResponse,
+    AgentAudioDoneResponse,
+    EndOfThoughtResponse,
+)
+
+from .client import (
+    # top level
+    SettingsConfigurationOptions,
+    UpdateInstructionsOptions,
+    UpdateSpeakOptions,
+    InjectAgentMessageOptions,
+    # sub level
+    Listen,
+    Speak,
+    Header,
+    Item,
+    Properties,
+    Parameters,
+    Function,
+    Provider,
+    Think,
+    Agent,
+    Input,
+    Output,
+    Audio,
+    Context,
+)
+
 # utilities
 # pylint: disable=wrong-import-position
 from .audio import Microphone, DeepgramMicrophoneError

diff --git a/deepgram/audio/microphone/microphone.py b/deepgram/audio/microphone/microphone.py
@@ -9,6 +9,7 @@
 import logging
 
 from ...utils import verboselogs
+
 from .constants import LOGGING, CHANNELS, RATE, CHUNK
 
 if TYPE_CHECKING:

diff --git a/deepgram/audio/speaker/speaker.py b/deepgram/audio/speaker/speaker.py
@@ -50,7 +50,6 @@ class Speaker:  # pylint: disable=too-many-instance-attributes
     # _asyncio_loop: asyncio.AbstractEventLoop
     # _asyncio_thread: threading.Thread
     _receiver_thread: Optional[threading.Thread] = None
-
     _loop: Optional[asyncio.AbstractEventLoop] = None
 
     _push_callback_org: Optional[Callable] = None
@@ -265,6 +264,7 @@ async def _start_asyncio_receiver(self):
                     await self._push_callback(message)
                 elif isinstance(message, bytes):
                     self._logger.verbose("Received audio data...")
+                    await self._push_callback(message)
                     self.add_audio_to_queue(message)
         except websockets.exceptions.ConnectionClosedOK as e:
             self._logger.debug("send() exiting gracefully: %d", e.code)
@@ -297,6 +297,7 @@ def _start_threaded_receiver(self):
                     self._push_callback(message)
                 elif isinstance(message, bytes):
                     self._logger.verbose("Received audio data...")
+                    self._push_callback(message)
                     self.add_audio_to_queue(message)
         except Exception as e:  # pylint: disable=broad-except
             self._logger.notice("_start_threaded_receiver exception: %s", str(e))
@@ -365,6 +366,7 @@ def _play(self, audio_out, stream, stop):
                                 "LastPlay delta is greater than threshold. Unmute!"
                             )
                             self._microphone.unmute()
+
                 data = audio_out.get(True, TIMEOUT)
                 with self._lock_wait:
                     self._last_datagram = datetime.now()

diff --git a/deepgram/client.py b/deepgram/client.py
@@ -55,7 +55,7 @@
 )
 
 # listen client
-from .clients import Listen, Read, Speak
+from .clients import ListenRouter, ReadRouter, SpeakRouter, AgentRouter
 
 # speech-to-text
 from .clients import LiveClient, AsyncLiveClient  # backward compat
@@ -308,6 +308,58 @@
     AsyncSelfHostedClient,
 )
 
+
+# agent
+from .clients import AgentWebSocketEvents
+
+# websocket
+from .clients import (
+    AgentWebSocketClient,
+    AsyncAgentWebSocketClient,
+)
+
+from .clients import (
+    #### common websocket response
+    # OpenResponse,
+    # CloseResponse,
+    # ErrorResponse,
+    # UnhandledResponse,
+    #### unique
+    WelcomeResponse,
+    SettingsAppliedResponse,
+    ConversationTextResponse,
+    UserStartedSpeakingResponse,
+    AgentThinkingResponse,
+    FunctionCallingResponse,
+    AgentStartedSpeakingResponse,
+    AgentAudioDoneResponse,
+    EndOfThoughtResponse,
+)
+
+from .clients import (
+    # top level
+    SettingsConfigurationOptions,
+    UpdateInstructionsOptions,
+    UpdateSpeakOptions,
+    InjectAgentMessageOptions,
+    # sub level
+    Listen,
+    Speak,
+    Header,
+    Item,
+    Properties,
+    Parameters,
+    Function,
+    Provider,
+    Think,
+    Agent,
+    Input,
+    Output,
+    Audio,
+    Context,
+)
+
+
 # client errors and options
 from .options import DeepgramClientOptions, ClientOptionsFromEnv
 from .errors import DeepgramApiKeyError
@@ -397,21 +449,21 @@ def listen(self):
         """
         Returns a Listen dot-notation router for interacting with Deepgram's transcription services.
         """
-        return Listen(self._config)
+        return ListenRouter(self._config)
 
     @property
     def read(self):
         """
         Returns a Read dot-notation router for interacting with Deepgram's read services.
         """
-        return Read(self._config)
+        return ReadRouter(self._config)
 
     @property
     def speak(self):
         """
         Returns a Speak dot-notation router for interacting with Deepgram's speak services.
         """
-        return Speak(self._config)
+        return SpeakRouter(self._config)
 
     @property
     @deprecation.deprecated(
@@ -480,6 +532,13 @@ def asyncselfhosted(self):
         """
         return self.Version(self._config, "asyncselfhosted")
 
+    @property
+    def agent(self):
+        """
+        Returns a Agent dot-notation router for interacting with Deepgram's speak services.
+        """
+        return AgentRouter(self._config)
+
     # INTERNAL CLASSES
     class Version:
         """

diff --git a/deepgram/clients/__init__.py b/deepgram/clients/__init__.py
@@ -48,9 +48,10 @@
 )
 from .errors import DeepgramModuleError
 
-from .listen_router import Listen
-from .read_router import Read
-from .speak_router import Speak
+from .listen_router import ListenRouter
+from .read_router import ReadRouter
+from .speak_router import SpeakRouter
+from .agent_router import AgentRouter
 
 # listen
 from .listen import LiveTranscriptionEvents
@@ -318,3 +319,53 @@
     SelfHostedClient,
     AsyncSelfHostedClient,
 )
+
+# agent
+from .agent import AgentWebSocketEvents
+
+# websocket
+from .agent import (
+    AgentWebSocketClient,
+    AsyncAgentWebSocketClient,
+)
+
+from .agent import (
+    #### common websocket response
+    # OpenResponse,
+    # CloseResponse,
+    # ErrorResponse,
+    # UnhandledResponse,
+    #### unique
+    WelcomeResponse,
+    SettingsAppliedResponse,
+    ConversationTextResponse,
+    UserStartedSpeakingResponse,
+    AgentThinkingResponse,
+    FunctionCallingResponse,
+    AgentStartedSpeakingResponse,
+    AgentAudioDoneResponse,
+    EndOfThoughtResponse,
+)
+
+from .agent import (
+    # top level
+    SettingsConfigurationOptions,
+    UpdateInstructionsOptions,
+    UpdateSpeakOptions,
+    InjectAgentMessageOptions,
+    # sub level
+    Listen,
+    Speak,
+    Header,
+    Item,
+    Properties,
+    Parameters,
+    Function,
+    Provider,
+    Think,
+    Agent,
+    Input,
+    Output,
+    Audio,
+    Context,
+)
diff --git a/deepgram/clients/agent/__init__.py b/deepgram/clients/agent/__init__.py
@@ -0,0 +1,52 @@
+# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
+# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
+# SPDX-License-Identifier: MIT
+
+from .enums import AgentWebSocketEvents
+
+# websocket
+from .client import (
+    AgentWebSocketClient,
+    AsyncAgentWebSocketClient,
+)
+
+from .client import (
+    #### common websocket response
+    OpenResponse,
+    CloseResponse,
+    ErrorResponse,
+    UnhandledResponse,
+    #### unique
+    WelcomeResponse,
+    SettingsAppliedResponse,
+    ConversationTextResponse,
+    UserStartedSpeakingResponse,
+    AgentThinkingResponse,
+    FunctionCallingResponse,
+    AgentStartedSpeakingResponse,
+    AgentAudioDoneResponse,
+    EndOfThoughtResponse,
+)
+
+from .client import (
+    # top level
+    SettingsConfigurationOptions,
+    UpdateInstructionsOptions,
+    UpdateSpeakOptions,
+    InjectAgentMessageOptions,
+    # sub level
+    Listen,
+    Speak,
+    Header,
+    Item,
+    Properties,
+    Parameters,
+    Function,
+    Provider,
+    Think,
+    Agent,
+    Input,
+    Output,
+    Audio,
+    Context,
+)