diff --git a/mypy.ini b/mypy.ini index d27ab5b..f5a3979 100644 --- a/mypy.ini +++ b/mypy.ini @@ -20,6 +20,3 @@ warn_unreachable = True strict_equality = True strict = True - -[mypy-edge_tts.voices] -disallow_any_decorated = False diff --git a/src/edge_tts/communicate.py b/src/edge_tts/communicate.py index c053950..0ad9ebb 100644 --- a/src/edge_tts/communicate.py +++ b/src/edge_tts/communicate.py @@ -11,7 +11,6 @@ from io import TextIOWrapper from queue import Queue from typing import ( - Any, AsyncGenerator, ContextManager, Dict, @@ -26,7 +25,8 @@ import aiohttp import certifi -from .constants import SEC_MS_GEC_VERSION, WSS_HEADERS, WSS_URL +from .constants import DEFAULT_VOICE, SEC_MS_GEC_VERSION, WSS_HEADERS, WSS_URL +from .data_classes import TTSConfig from .drm import DRM from .exceptions import ( NoAudioReceived, @@ -34,8 +34,7 @@ UnknownResponse, WebSocketError, ) -from .models import TTSConfig -from .typing import TTSChunk +from .typing import CommunicateState, TTSChunk def get_headers_and_data( @@ -109,7 +108,7 @@ def split_text_by_byte_length( text will be inside of an XML tag. Args: - text (str or bytes): The string to be split. + text (str or bytes): The string to be split. If bytes, it must be UTF-8 encoded. byte_length (int): The maximum byte length of each string in the list. Yield: @@ -166,12 +165,9 @@ def mkssml(tc: TTSConfig, escaped_text: Union[str, bytes]) -> str: Returns: str: The SSML string. """ - - # If the text is bytes, convert it to a string. if isinstance(escaped_text, bytes): escaped_text = escaped_text.decode("utf-8") - # Return the SSML string. return ( "" f"" @@ -244,7 +240,7 @@ class Communicate: def __init__( self, text: str, - voice: str = "en-US-EmmaMultilingualNeural", + voice: str = DEFAULT_VOICE, *, rate: str = "+0%", volume: str = "+0%", @@ -290,8 +286,8 @@ def __init__( self.connector: Optional[aiohttp.BaseConnector] = connector # Store current state of TTS. - self.state: Dict[str, Any] = { - "partial_text": None, + self.state: CommunicateState = { + "partial_text": b"", "offset_compensation": 0, "last_duration_offset": 0, "stream_was_called": False, diff --git a/src/edge_tts/constants.py b/src/edge_tts/constants.py index 779044f..9395a06 100644 --- a/src/edge_tts/constants.py +++ b/src/edge_tts/constants.py @@ -6,6 +6,8 @@ WSS_URL = f"wss://{BASE_URL}/edge/v1?TrustedClientToken={TRUSTED_CLIENT_TOKEN}" VOICE_LIST = f"https://{BASE_URL}/voices/list?trustedclienttoken={TRUSTED_CLIENT_TOKEN}" +DEFAULT_VOICE = "en-US-EmmaMultilingualNeural" + CHROMIUM_FULL_VERSION = "130.0.2849.68" CHROMIUM_MAJOR_VERSION = CHROMIUM_FULL_VERSION.split(".", maxsplit=1)[0] SEC_MS_GEC_VERSION = f"1-{CHROMIUM_FULL_VERSION}" diff --git a/src/edge_tts/models.py b/src/edge_tts/data_classes.py similarity index 87% rename from src/edge_tts/models.py rename to src/edge_tts/data_classes.py index f28096b..703fe33 100644 --- a/src/edge_tts/models.py +++ b/src/edge_tts/data_classes.py @@ -1,6 +1,8 @@ -"""This module contains the TTSConfig dataclass, which represents the -internal TTS configuration for edge-tts's Communicate class.""" +"""Data models for edge-tts.""" +# pylint: disable=too-few-public-methods + +import argparse import re from dataclasses import dataclass @@ -69,3 +71,18 @@ def __post_init__(self) -> None: self.validate_string_param("rate", self.rate, r"^[+-]\d+%$") self.validate_string_param("volume", self.volume, r"^[+-]\d+%$") self.validate_string_param("pitch", self.pitch, r"^[+-]\d+Hz$") + + +class UtilArgs(argparse.Namespace): + """CLI arguments.""" + + text: str + file: str + voice: str + list_voices: bool + rate: str + volume: str + pitch: str + write_media: str + write_subtitles: str + proxy: str diff --git a/src/edge_tts/typing.py b/src/edge_tts/typing.py index 68b23f4..225293d 100644 --- a/src/edge_tts/typing.py +++ b/src/edge_tts/typing.py @@ -78,15 +78,24 @@ class Voice(TypedDict): VoiceTag: VoiceTag -class VoiceManagerVoice(Voice): - """Voice data for VoiceManager.""" +class VoicesManagerVoice(Voice): + """Voice data for VoicesManager.""" Language: str -class VoiceManagerFind(TypedDict): - """Voice data for VoiceManager.find().""" +class VoicesManagerFind(TypedDict): + """Voice data for VoicesManager.find().""" Gender: NotRequired[Literal["Female", "Male"]] Locale: NotRequired[str] Language: NotRequired[str] + + +class CommunicateState(TypedDict): + """Communicate state data.""" + + partial_text: bytes + offset_compensation: float + last_duration_offset: float + stream_was_called: bool diff --git a/src/edge_tts/util.py b/src/edge_tts/util.py index f16ee21..df54b55 100644 --- a/src/edge_tts/util.py +++ b/src/edge_tts/util.py @@ -3,14 +3,16 @@ import argparse import asyncio import sys -from typing import Any, Optional, TextIO +from typing import Optional, TextIO from tabulate import tabulate from . import Communicate, SubMaker, list_voices +from .constants import DEFAULT_VOICE +from .data_classes import UtilArgs -async def _print_voices(*, proxy: str) -> None: +async def _print_voices(*, proxy: Optional[str]) -> None: """Print all available voices.""" voices = await list_voices(proxy=proxy) voices = sorted(voices, key=lambda voice: voice["ShortName"]) @@ -27,7 +29,7 @@ async def _print_voices(*, proxy: str) -> None: print(tabulate(table, headers)) -async def _run_tts(args: Any) -> None: +async def _run_tts(args: UtilArgs) -> None: """Run TTS after parsing arguments from command line.""" try: @@ -84,15 +86,17 @@ async def _run_tts(args: Any) -> None: async def amain() -> None: """Async main function""" - parser = argparse.ArgumentParser(description="Microsoft Edge TTS") + parser = argparse.ArgumentParser( + description="Text-to-speech using Microsoft Edge's online TTS service." + ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-t", "--text", help="what TTS will say") group.add_argument("-f", "--file", help="same as --text but read from file") parser.add_argument( "-v", "--voice", - help="voice for TTS. Default: en-US-AriaNeural", - default="en-US-AriaNeural", + help=f"voice for TTS. Default: {DEFAULT_VOICE}", + default=DEFAULT_VOICE, ) group.add_argument( "-l", @@ -111,7 +115,7 @@ async def amain() -> None: help="send subtitle output to provided file instead of stderr", ) parser.add_argument("--proxy", help="use a proxy for TTS and voice list.") - args = parser.parse_args() + args = parser.parse_args(namespace=UtilArgs()) if args.list_voices: await _print_voices(proxy=args.proxy) diff --git a/src/edge_tts/voices.py b/src/edge_tts/voices.py index 2754788..81a5448 100644 --- a/src/edge_tts/voices.py +++ b/src/edge_tts/voices.py @@ -3,7 +3,7 @@ import json import ssl -from typing import Any, List, Optional +from typing import List, Optional import aiohttp import certifi @@ -11,7 +11,7 @@ from .constants import SEC_MS_GEC_VERSION, VOICE_HEADERS, VOICE_LIST from .drm import DRM -from .typing import Voice, VoiceManagerFind, VoiceManagerVoice +from .typing import Voice, VoicesManagerFind, VoicesManagerVoice async def __list_voices( @@ -91,12 +91,12 @@ class VoicesManager: """ def __init__(self) -> None: - self.voices: List[VoiceManagerVoice] = [] + self.voices: List[VoicesManagerVoice] = [] self.called_create: bool = False @classmethod async def create( - cls: Any, custom_voices: Optional[List[Voice]] = None + cls, custom_voices: Optional[List[Voice]] = None ) -> "VoicesManager": """ Creates a VoicesManager object and populates it with all available voices. @@ -109,7 +109,7 @@ async def create( self.called_create = True return self - def find(self, **kwargs: Unpack[VoiceManagerFind]) -> List[VoiceManagerVoice]: + def find(self, **kwargs: Unpack[VoicesManagerFind]) -> List[VoicesManagerVoice]: """ Finds all matching voices based on the provided attributes. """