Skip to content

Commit

Permalink
Merge pull request #434 from dvonthenen/reintroduce-tts-ws
Browse files Browse the repository at this point in the history
Reintroduce TTS WS
  • Loading branch information
davidvonthenen authored Sep 18, 2024
2 parents 8226969 + 78c207b commit 78c7c45
Show file tree
Hide file tree
Showing 31 changed files with 3,136 additions and 217 deletions.
60 changes: 39 additions & 21 deletions deepgram/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,12 @@
from .client import (
SpeakOptions,
SpeakRESTOptions,
# SpeakWebSocketOptions,
SpeakWSOptions,
# FileSource,
SpeakRestSource,
SpeakSource,
)
from .client import SpeakWebSocketEvents
from .client import SpeakWebSocketEvents, SpeakWebSocketMessage

## speak REST
from .client import (
Expand All @@ -122,21 +122,23 @@
SpeakRESTResponse,
)

# ## speak WebSocket
# from .client import (
# SpeakWebSocketClient,
# AsyncSpeakWebSocketClient,
# )
# from .client import (
# SpeakWebSocketResponse,
# # OpenResponse,
# # MetadataResponse,
# FlushedResponse,
# # CloseResponse,
# # UnhandledResponse,
# WarningResponse,
# # ErrorResponse,
# )
## speak WebSocket
from .client import (
SpeakWebSocketClient,
AsyncSpeakWebSocketClient,
SpeakWSClient,
AsyncSpeakWSClient,
)
from .client import (
# OpenResponse,
# MetadataResponse,
FlushedResponse,
ClearedResponse,
# CloseResponse,
# UnhandledResponse,
WarningResponse,
# ErrorResponse,
)

# manage
from .client import ManageClient, AsyncManageClient
Expand Down Expand Up @@ -180,10 +182,26 @@
)

# utilities
# pylint: disable=wrong-import-position
from .audio import Microphone, DeepgramMicrophoneError
from .audio import (
LOGGING,
CHANNELS,
RATE,
CHUNK,
INPUT_LOGGING,
INPUT_CHANNELS,
INPUT_RATE,
INPUT_CHUNK,
)

LOGGING = INPUT_LOGGING
CHANNELS = INPUT_CHANNELS
RATE = INPUT_RATE
CHUNK = INPUT_CHUNK

from .audio import Speaker
from .audio import (
OUTPUT_LOGGING,
OUTPUT_CHANNELS,
OUTPUT_RATE,
OUTPUT_CHUNK,
)

# pylint: enable=wrong-import-position
16 changes: 15 additions & 1 deletion deepgram/audio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,19 @@
# SPDX-License-Identifier: MIT

from .microphone import Microphone
from .microphone import LOGGING, CHANNELS, RATE, CHUNK
from .microphone import DeepgramMicrophoneError
from .microphone import (
LOGGING as INPUT_LOGGING,
CHANNELS as INPUT_CHANNELS,
RATE as INPUT_RATE,
CHUNK as INPUT_CHUNK,
)

from .speaker import Speaker
from .speaker import DeepgramSpeakerError
from .speaker import (
LOGGING as OUTPUT_LOGGING,
CHANNELS as OUTPUT_CHANNELS,
RATE as OUTPUT_RATE,
CHUNK as OUTPUT_CHUNK,
)
1 change: 0 additions & 1 deletion deepgram/audio/microphone/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from ...utils import verboselogs

# Constants for microphone

LOGGING = verboselogs.WARNING
CHANNELS = 1
RATE = 16000
Expand Down
83 changes: 60 additions & 23 deletions deepgram/audio/microphone/microphone.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import inspect
import asyncio
import threading
from typing import Optional, Callable, TYPE_CHECKING
from typing import Optional, Callable, Union, TYPE_CHECKING
import logging

from ...utils import verboselogs
Expand All @@ -21,10 +21,10 @@ class Microphone: # pylint: disable=too-many-instance-attributes
"""

_logger: verboselogs.VerboseLogger
_exit: threading.Event

_audio: "pyaudio.PyAudio"
_stream: "pyaudio.Stream"

_chunk: int
_rate: int
_format: int
Expand All @@ -34,9 +34,10 @@ class Microphone: # pylint: disable=too-many-instance-attributes

_asyncio_loop: asyncio.AbstractEventLoop
_asyncio_thread: threading.Thread
_exit: threading.Event

_push_callback_org: object
_push_callback: object
_push_callback_org: Optional[Callable] = None
_push_callback: Optional[Callable] = None

def __init__(
self,
Expand All @@ -53,6 +54,7 @@ def __init__(
self._logger = verboselogs.VerboseLogger(__name__)
self._logger.addHandler(logging.StreamHandler())
self._logger.setLevel(verbose)

self._exit = threading.Event()

self._audio = pyaudio.PyAudio()
Expand All @@ -71,9 +73,16 @@ def _start_asyncio_loop(self) -> None:

def is_active(self) -> bool:
"""
returns True if the stream is active, False otherwise
is_active - returns the state of the stream
Args:
None
Returns:
True if the stream is active, False otherwise
"""
self._logger.debug("Microphone.is_active ENTER")

if self._stream is None:
self._logger.error("stream is None")
self._logger.debug("Microphone.is_active LEAVE")
Expand All @@ -87,24 +96,34 @@ def is_active(self) -> bool:

def set_callback(self, push_callback: Callable) -> None:
"""
Set the callback function to be called when data is received.
set_callback - sets the callback function to be called when data is received.
Args:
push_callback (Callable): The callback function to be called when data is received.
This should be the websocket send function.
Returns:
None
"""
self._push_callback_org = push_callback

def start(self) -> bool:
"""
starts the microphone stream
starts - starts the microphone stream
Returns:
bool: True if the stream was started, False otherwise
"""
self._logger.debug("Microphone.start ENTER")

self._logger.info("format: %s", self._format)
self._logger.info("channels: %d", self._channels)
self._logger.info("rate: %d", self._rate)
self._logger.info("chunk: %d", self._chunk)
self._logger.info("input_device_id: %d", self._input_device_index)
# self._logger.info("input_device_id: %d", self._input_device_index)

if self._push_callback_org is None:
self._logger.error("start() failed. No callback set.")
self._logger.error("start failed. No callback set.")
self._logger.debug("Microphone.start LEAVE")
return False

Expand All @@ -114,9 +133,13 @@ def start(self) -> bool:
self._asyncio_thread = threading.Thread(target=self._start_asyncio_loop)
self._asyncio_thread.start()

self._push_callback = lambda data: asyncio.run_coroutine_threadsafe(
self._push_callback_org(data), self._asyncio_loop
).result()
self._push_callback = lambda data: (
asyncio.run_coroutine_threadsafe(
self._push_callback_org(data), self._asyncio_loop
).result()
if self._push_callback_org
else None
)
else:
self._logger.verbose("regular threaded callback")
self._push_callback = self._push_callback_org
Expand All @@ -134,7 +157,7 @@ def start(self) -> bool:
self._exit.clear()
self._stream.start_stream()

self._logger.notice("start() succeeded")
self._logger.notice("start succeeded")
self._logger.debug("Microphone.start LEAVE")
return True

Expand Down Expand Up @@ -176,41 +199,50 @@ def _callback(

def mute(self) -> bool:
"""
Mutes the microphone stream
mute - mutes the microphone stream
Returns:
bool: True if the stream was muted, False otherwise
"""
self._logger.debug("Microphone.mute ENTER")

if self._stream is None:
self._logger.error("mute() failed. Library not initialized.")
self._logger.error("mute failed. Library not initialized.")
self._logger.debug("Microphone.mute LEAVE")
return False

self._is_muted = True

self._logger.notice("mute() succeeded")
self._logger.notice("mute succeeded")
self._logger.debug("Microphone.mute LEAVE")
return True

def unmute(self) -> bool:
"""
Unmutes the microphone stream
unmute - unmutes the microphone stream
Returns:
bool: True if the stream was unmuted, False otherwise
"""
self._logger.debug("Microphone.unmute ENTER")

if self._stream is None:
self._logger.error("unmute() failed. Library not initialized.")
self._logger.error("unmute failed. Library not initialized.")
self._logger.debug("Microphone.unmute LEAVE")
return False

self._is_muted = False

self._logger.notice("unmute() succeeded")
self._logger.notice("unmute succeeded")
self._logger.debug("Microphone.unmute LEAVE")
return True

def finish(self) -> bool:
"""
Stops the microphone stream
finish - stops the microphone stream
Returns:
bool: True if the stream was stopped, False otherwise
"""
self._logger.debug("Microphone.finish ENTER")

Expand All @@ -219,19 +251,24 @@ def finish(self) -> bool:

# Stop the stream.
if self._stream is not None:
self._logger.notice("stopping stream...")
self._stream.stop_stream()
self._stream.close()
self._stream = None # type: ignore
self._logger.notice("stream stopped")

# clean up the thread
if (
inspect.iscoroutinefunction(self._push_callback_org)
and self._asyncio_thread is not None
# inspect.iscoroutinefunction(self._push_callback_org)
# and
self._asyncio_thread
is not None
):
self._logger.notice("stopping asyncio loop...")
self._asyncio_loop.call_soon_threadsafe(self._asyncio_loop.stop)
self._asyncio_thread.join()
self._asyncio_thread = None # type: ignore
self._logger.notice("stream/recv thread joined")
self._logger.notice("_asyncio_thread joined")

self._logger.notice("finish succeeded")
self._logger.debug("Microphone.finish LEAVE")
Expand Down
7 changes: 7 additions & 0 deletions deepgram/audio/speaker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT

from .speaker import Speaker
from .errors import DeepgramSpeakerError
from .constants import LOGGING, CHANNELS, RATE, CHUNK
12 changes: 12 additions & 0 deletions deepgram/audio/speaker/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT

from ...utils import verboselogs

# Constants for microphone
LOGGING = verboselogs.WARNING
TIMEOUT = 0.050
CHANNELS = 1
RATE = 16000
CHUNK = 8194
21 changes: 21 additions & 0 deletions deepgram/audio/speaker/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT


# exceptions for speaker
class DeepgramSpeakerError(Exception):
"""
Exception raised for known errors related to Speaker library.
Attributes:
message (str): The error message describing the exception.
"""

def __init__(self, message: str):
super().__init__(message)
self.name = "DeepgramSpeakerError"
self.message = message

def __str__(self):
return f"{self.name}: {self.message}"
Loading

0 comments on commit 78c7c45

Please sign in to comment.