Skip to content

Commit

Permalink
Agent API Early Access
Browse files Browse the repository at this point in the history
  • Loading branch information
davidvonthenen committed Nov 7, 2024
1 parent aff6110 commit 51f595d
Show file tree
Hide file tree
Showing 20 changed files with 2,653 additions and 16 deletions.
53 changes: 52 additions & 1 deletion deepgram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from .errors import DeepgramApiKeyError

# listen/read client
from .client import Listen, Read
from .client import ListenRouter, ReadRouter, SpeakRouter, AgentRouter

# common
from .client import (
Expand Down Expand Up @@ -302,6 +302,57 @@
AsyncSelfHostedClient,
)


# agent
from .client import AgentWebSocketEvents

# websocket
from .client import (
AgentWebSocketClient,
AsyncAgentWebSocketClient,
)

from .client import (
#### common websocket response
# OpenResponse,
# CloseResponse,
# ErrorResponse,
# UnhandledResponse,
#### unique
WelcomeResponse,
SettingsAppliedResponse,
ConversationTextResponse,
UserStartedSpeakingResponse,
AgentThinkingResponse,
FunctionCallingResponse,
AgentStartedSpeakingResponse,
AgentAudioDoneResponse,
EndOfThoughtResponse,
)

from .client import (
# top level
SettingsConfigurationOptions,
UpdateInstructionsOptions,
UpdateSpeakOptions,
InjectAgentMessageOptions,
# sub level
Listen,
Speak,
Header,
Item,
Properties,
Parameters,
Function,
Provider,
Think,
Agent,
Input,
Output,
Audio,
Context,
)

# utilities
# pylint: disable=wrong-import-position
from .audio import Microphone, DeepgramMicrophoneError
Expand Down
1 change: 1 addition & 0 deletions deepgram/audio/microphone/microphone.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging

from ...utils import verboselogs

from .constants import LOGGING, CHANNELS, RATE, CHUNK

if TYPE_CHECKING:
Expand Down
4 changes: 3 additions & 1 deletion deepgram/audio/speaker/speaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ class Speaker: # pylint: disable=too-many-instance-attributes
# _asyncio_loop: asyncio.AbstractEventLoop
# _asyncio_thread: threading.Thread
_receiver_thread: Optional[threading.Thread] = None

_loop: Optional[asyncio.AbstractEventLoop] = None

_push_callback_org: Optional[Callable] = None
Expand Down Expand Up @@ -265,6 +264,7 @@ async def _start_asyncio_receiver(self):
await self._push_callback(message)
elif isinstance(message, bytes):
self._logger.verbose("Received audio data...")
await self._push_callback(message)
self.add_audio_to_queue(message)
except websockets.exceptions.ConnectionClosedOK as e:
self._logger.debug("send() exiting gracefully: %d", e.code)
Expand Down Expand Up @@ -297,6 +297,7 @@ def _start_threaded_receiver(self):
self._push_callback(message)
elif isinstance(message, bytes):
self._logger.verbose("Received audio data...")
self._push_callback(message)
self.add_audio_to_queue(message)
except Exception as e: # pylint: disable=broad-except
self._logger.notice("_start_threaded_receiver exception: %s", str(e))
Expand Down Expand Up @@ -365,6 +366,7 @@ def _play(self, audio_out, stream, stop):
"LastPlay delta is greater than threshold. Unmute!"
)
self._microphone.unmute()

data = audio_out.get(True, TIMEOUT)
with self._lock_wait:
self._last_datagram = datetime.now()
Expand Down
67 changes: 63 additions & 4 deletions deepgram/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
)

# listen client
from .clients import Listen, Read, Speak
from .clients import ListenRouter, ReadRouter, SpeakRouter, AgentRouter

# speech-to-text
from .clients import LiveClient, AsyncLiveClient # backward compat
Expand Down Expand Up @@ -308,6 +308,58 @@
AsyncSelfHostedClient,
)


# agent
from .clients import AgentWebSocketEvents

# websocket
from .clients import (
AgentWebSocketClient,
AsyncAgentWebSocketClient,
)

from .clients import (
#### common websocket response
# OpenResponse,
# CloseResponse,
# ErrorResponse,
# UnhandledResponse,
#### unique
WelcomeResponse,
SettingsAppliedResponse,
ConversationTextResponse,
UserStartedSpeakingResponse,
AgentThinkingResponse,
FunctionCallingResponse,
AgentStartedSpeakingResponse,
AgentAudioDoneResponse,
EndOfThoughtResponse,
)

from .clients import (
# top level
SettingsConfigurationOptions,
UpdateInstructionsOptions,
UpdateSpeakOptions,
InjectAgentMessageOptions,
# sub level
Listen,
Speak,
Header,
Item,
Properties,
Parameters,
Function,
Provider,
Think,
Agent,
Input,
Output,
Audio,
Context,
)


# client errors and options
from .options import DeepgramClientOptions, ClientOptionsFromEnv
from .errors import DeepgramApiKeyError
Expand Down Expand Up @@ -397,21 +449,21 @@ def listen(self):
"""
Returns a Listen dot-notation router for interacting with Deepgram's transcription services.
"""
return Listen(self._config)
return ListenRouter(self._config)

@property
def read(self):
"""
Returns a Read dot-notation router for interacting with Deepgram's read services.
"""
return Read(self._config)
return ReadRouter(self._config)

@property
def speak(self):
"""
Returns a Speak dot-notation router for interacting with Deepgram's speak services.
"""
return Speak(self._config)
return SpeakRouter(self._config)

@property
@deprecation.deprecated(
Expand Down Expand Up @@ -480,6 +532,13 @@ def asyncselfhosted(self):
"""
return self.Version(self._config, "asyncselfhosted")

@property
def agent(self):
"""
Returns a Agent dot-notation router for interacting with Deepgram's speak services.
"""
return AgentRouter(self._config)

# INTERNAL CLASSES
class Version:
"""
Expand Down
57 changes: 54 additions & 3 deletions deepgram/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@
)
from .errors import DeepgramModuleError

from .listen_router import Listen
from .read_router import Read
from .speak_router import Speak
from .listen_router import ListenRouter
from .read_router import ReadRouter
from .speak_router import SpeakRouter
from .agent_router import AgentRouter

# listen
from .listen import LiveTranscriptionEvents
Expand Down Expand Up @@ -318,3 +319,53 @@
SelfHostedClient,
AsyncSelfHostedClient,
)

# agent
from .agent import AgentWebSocketEvents

# websocket
from .agent import (
AgentWebSocketClient,
AsyncAgentWebSocketClient,
)

from .agent import (
#### common websocket response
# OpenResponse,
# CloseResponse,
# ErrorResponse,
# UnhandledResponse,
#### unique
WelcomeResponse,
SettingsAppliedResponse,
ConversationTextResponse,
UserStartedSpeakingResponse,
AgentThinkingResponse,
FunctionCallingResponse,
AgentStartedSpeakingResponse,
AgentAudioDoneResponse,
EndOfThoughtResponse,
)

from .agent import (
# top level
SettingsConfigurationOptions,
UpdateInstructionsOptions,
UpdateSpeakOptions,
InjectAgentMessageOptions,
# sub level
Listen,
Speak,
Header,
Item,
Properties,
Parameters,
Function,
Provider,
Think,
Agent,
Input,
Output,
Audio,
Context,
)
52 changes: 52 additions & 0 deletions deepgram/clients/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT

from .enums import AgentWebSocketEvents

# websocket
from .client import (
AgentWebSocketClient,
AsyncAgentWebSocketClient,
)

from .client import (
#### common websocket response
OpenResponse,
CloseResponse,
ErrorResponse,
UnhandledResponse,
#### unique
WelcomeResponse,
SettingsAppliedResponse,
ConversationTextResponse,
UserStartedSpeakingResponse,
AgentThinkingResponse,
FunctionCallingResponse,
AgentStartedSpeakingResponse,
AgentAudioDoneResponse,
EndOfThoughtResponse,
)

from .client import (
# top level
SettingsConfigurationOptions,
UpdateInstructionsOptions,
UpdateSpeakOptions,
InjectAgentMessageOptions,
# sub level
Listen,
Speak,
Header,
Item,
Properties,
Parameters,
Function,
Provider,
Think,
Agent,
Input,
Output,
Audio,
Context,
)
Loading

0 comments on commit 51f595d

Please sign in to comment.