From 218e4e9eb588c7cb8e9bc9d657312f7bbffed0e4 Mon Sep 17 00:00:00 2001 From: MrPandir Date: Tue, 13 Feb 2024 11:13:33 +0100 Subject: [PATCH 1/6] feat: add InvalidSampleRate exception --- exceptions.py | 10 +++++++++- tts.py | 8 ++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/exceptions.py b/exceptions.py index 9f23c18..8153e6f 100644 --- a/exceptions.py +++ b/exceptions.py @@ -1,11 +1,19 @@ class NotFoundModelException(Exception): def __init__(self, speaker_name: str): + self.speaker_name = speaker_name super().__init__(f"Model not found for speaker: {speaker_name}") class NotCorrectTextException(Exception): def __init__(self, text: str): + self.text = text super().__init__(f"Text not correct: {text}") class TextTooLongException(Exception): def __init__(self, text: str): - super().__init__(f"Text too long. Length is {len(text)}. Max length is 930 symbols.") \ No newline at end of file + self.text = text + super().__init__(f"Text too long. Length is {len(text)}. Max length is 930 symbols.") + +class InvalidSampleRateException(Exception): + def __init__(self, sample_rate: int) -> None: + self.sample_rate = sample_rate + super().__init__(f"Invalid sample rate {sample_rate}. Supported sample rates are 8 000, 24 000, and 48 000.") diff --git a/tts.py b/tts.py index cee3135..f7f16cd 100644 --- a/tts.py +++ b/tts.py @@ -1,11 +1,11 @@ from typing import TYPE_CHECKING from pathlib import Path +from io import BytesIO import torch from torch.package import PackageImporter -from io import BytesIO -from exceptions import NotFoundModelException, NotCorrectTextException, TextTooLongException +from exceptions import * if TYPE_CHECKING: from .typing.package import TTSModelMultiAcc_v3 @@ -21,6 +21,8 @@ class TTS: + VALID_SAMPLE_RATES = (8000, 24000, 48000) + def __init__(self): self.models: dict[str, "TTSModelMultiAcc_v3"] = {} self.speakers: dict[str, list[str]] = {} @@ -33,6 +35,8 @@ def generate(self, text: str, speaker: str, sample_rate: int) -> bytes: model = self.model_by_speaker.get(speaker) if model is None: raise NotFoundModelException(speaker) + if not sample_rate in self.VALID_SAMPLE_RATES: + raise InvalidSampleRateException(sample_rate) return self._generate_audio(model, text, speaker, sample_rate) From 1f61482e4b6d6f9659e8d2e761e9329c75446267 Mon Sep 17 00:00:00 2001 From: MrPandir Date: Tue, 13 Feb 2024 11:14:28 +0100 Subject: [PATCH 2/6] feat: replace fastAPI with litestar --- docker/Dockerfile | 2 +- http_exceptions.py | 44 ++++++++++++++++++++++ main.py | 15 -------- openapi_examples.py | 45 ++++++++-------------- openapi_responses.py | 47 ----------------------- requirements.txt | 4 +- server.py | 89 +++++++++++++++++++++++++++----------------- 7 files changed, 117 insertions(+), 129 deletions(-) create mode 100644 http_exceptions.py delete mode 100644 main.py delete mode 100644 openapi_responses.py diff --git a/docker/Dockerfile b/docker/Dockerfile index 6dee8ab..3a819ab 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -26,4 +26,4 @@ COPY --from=models-installer /app/models /app/models COPY --from=pip-installer /root/.local /root/.local ENV PATH=/root/.local/bin:$PATH -CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["litestar", "--app", "server:app", "run", "--host", "0.0.0.0", "--port", "8000"] diff --git a/http_exceptions.py b/http_exceptions.py new file mode 100644 index 0000000..86b7c21 --- /dev/null +++ b/http_exceptions.py @@ -0,0 +1,44 @@ +from typing import Any + +from litestar.exceptions import HTTPException +from litestar import status_codes as status + + +class BaseHTTPException(HTTPException): + headers = {"Content-Type": "application/json"} + + def __init__(self, extra: dict[str, Any] = None) -> None: + super().__init__( + detail=self.detail, + status_code=self.status_code, + headers=self.headers, + extra=extra, + ) + + +class NotFoundSpeakerHTTPException(BaseHTTPException): + status_code = status.HTTP_404_NOT_FOUND + detail = "Speaker not found" + + +class NotCorrectTextHTTPException(BaseHTTPException): + status_code = status.HTTP_422_UNPROCESSABLE_ENTITY + detail = "Text is not correct" + + +class TextTooLongHTTPException(BaseHTTPException): + status_code = status.HTTP_413_REQUEST_ENTITY_TOO_LARGE + detail = "Text too long" + + +class InvalidSampleRateHTTPException(BaseHTTPException): + status_code = status.HTTP_400_BAD_REQUEST + detail = "Invalid sample rate" + + +genetate_exceptions = [ + NotFoundSpeakerHTTPException, + NotCorrectTextHTTPException, + TextTooLongHTTPException, + InvalidSampleRateHTTPException, +] diff --git a/main.py b/main.py deleted file mode 100644 index c1e92cc..0000000 --- a/main.py +++ /dev/null @@ -1,15 +0,0 @@ -from dotenv import load_dotenv - - -def run_server(): - import uvicorn - - uvicorn.run("server:app", host="localhost", port=8000) - -load_dotenv() - -if __name__ == "__main__": - run_server() -else: - # this is needed to start the server via the uvicorn command - from server import app diff --git a/openapi_examples.py b/openapi_examples.py index ff41e95..fbfdd4e 100644 --- a/openapi_examples.py +++ b/openapi_examples.py @@ -1,32 +1,19 @@ -from fastapi import Query +from litestar.openapi.spec import Example -TextExamples = Query( - openapi_examples={ - "ru_1": { - "value": "Съешьте ещё этих мягких французских булочек, да выпейте чаю." - }, - "ru_2": { - "value": "В недрах тундры выдры в гетрах тырят в вёдра ядра кедров." - }, - "en_1": { - "value": "Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?" - }, - } -) +text_examples = [ + Example("ru_1", value="Съешьте ещё этих мягких французских булочек, да выпейте чаю."), + Example("ru_2", value="В недрах тундры выдры в гетрах тырят в вёдра ядра кедров."), + Example("en_1", value="Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?"), +] -SpeakerExamples = Query( - openapi_examples={ - "ru_aidar": {"value": "aidar"}, - "ru_baya": {"value": "baya"}, - "en_0": {"value": "en_0"}, - } -) +speaker_examples = [ + Example("ru_aidar", value="aidar"), + Example("ru_baya", value="baya"), + Example("en_0", value="en_0"), +] -SampleRateExamples = Query( - openapi_examples={ - "8 000": {"value": 8_000}, - "24 000": {"value": 24_000}, - "48 000": {"value": 48_000}, - }, - description="Sample rate in Hz", -) +sample_rate_examples = [ + Example("8 000", value=8_000), + Example("24 000", value=24_000), + Example("48 000", value=48_000), +] diff --git a/openapi_responses.py b/openapi_responses.py deleted file mode 100644 index 59b5919..0000000 --- a/openapi_responses.py +++ /dev/null @@ -1,47 +0,0 @@ -GENERATE_RESPONSES = { - 200: {"content": {"audio/wav": {}}}, - 404: { - "content": { - "application/json": { - "example": { - "status_code": 404, - "detail": "Model not found for speaker: {speaker_name}", - } - } - }, - "description": "Model not found for speaker", - }, - 400: { - "content": { - "application/json": { - "example": { - "status_code": 400, - "detail": "Invalid sample rate: {sample_rate}. Use 8 000, 24 000 or 48 000", - } - } - }, - "description": "Invalid sample rate", - }, - 422: { - "content": { - "application/json": { - "example": { - "status_code": 422, - "detail": "Text not correct: {text}", - } - } - }, - "description": "Text not correct", - }, - 413: { - "content": { - "application/json": { - "example": { - "detail": "Text too long. Length is {len(text)}. Max length is 930 symbols.", - } - } - }, - "description": "Text too long", - }, -} - diff --git a/requirements.txt b/requirements.txt index dc62cbc..05fec9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -fastapi==0.109.0 -uvicorn==0.25.0 +litestar==2.5.5 +uvicorn==0.27.1 --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.0 diff --git a/server.py b/server.py index a1b7e20..790d32e 100644 --- a/server.py +++ b/server.py @@ -1,51 +1,70 @@ -from typing import Annotated from os import environ +from typing import Annotated -from fastapi import FastAPI, Response, HTTPException, status +from dotenv import load_dotenv +from litestar import Litestar, get, Response +from litestar.openapi import OpenAPIConfig +from litestar.config.response_cache import CACHE_FOREVER +from litestar.params import Parameter from tts import tts -from openapi_examples import TextExamples, SpeakerExamples, SampleRateExamples -from openapi_responses import GENERATE_RESPONSES -from exceptions import NotFoundModelException, NotCorrectTextException, TextTooLongException - -app = FastAPI() - -MAX_TEXT_LENGTH = 930 -text_length_limit = min(int(environ.get("TEXT_LENGTH_LIMIT", MAX_TEXT_LENGTH)), MAX_TEXT_LENGTH) -class TextTooLongHTTPException(HTTPException): - def __init__(self, text: str): - super().__init__( - status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, - detail=f"Text too long. Length is {len(text)}. Max length is {text_length_limit}.", - ) +from openapi_examples import * +from http_exceptions import * +from exceptions import * + + +load_dotenv() + +SILERO_MAX_TEXT_LENGTH = 930 +text_length_limit = min( + int(environ.get("TEXT_LENGTH_LIMIT", SILERO_MAX_TEXT_LENGTH)), + SILERO_MAX_TEXT_LENGTH, +) -@app.get("/generate", responses=GENERATE_RESPONSES) +@get( + "/generate", + summary="Generate WAV audio from text", + media_type="audio/wav", + sync_to_thread=True, + raises=genetate_exceptions, +) def generate( - text: Annotated[str, TextExamples], - speaker: Annotated[str, SpeakerExamples], - sample_rate: Annotated[int, SampleRateExamples] = 48_000, -): - if sample_rate not in (8_000, 24_000, 48_000): - raise HTTPException( - status.HTTP_400_BAD_REQUEST, - detail=f"Invalid sample rate: {sample_rate}. Use 8 000, 24 000 or 48 000", - ) + text: Annotated[str, Parameter(examples=text_examples)], + speaker: Annotated[str, Parameter(examples=speaker_examples)], + sample_rate: Annotated[ + int, Parameter(examples=sample_rate_examples, default=48_000) + ], +) -> Response: if len(text) > text_length_limit: - raise TextTooLongHTTPException(text) + raise TextTooLongHTTPException( + {"text": text, "length": len(text), "max_length": text_length_limit} + ) try: audio = tts.generate(text, speaker, sample_rate) - except NotFoundModelException as error: - return HTTPException(status.HTTP_404_NOT_FOUND, detail=str(error)) - except NotCorrectTextException as error: - return HTTPException(status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(error)) - except TextTooLongException as error: - return TextTooLongHTTPException(text) + except NotFoundModelException: + raise NotFoundSpeakerHTTPException({"speaker": speaker}) + except NotCorrectTextException: + raise NotCorrectTextHTTPException({"text": text}) + except TextTooLongException: + raise TextTooLongHTTPException( + {"text": text, "length": len(text), "max_length": text_length_limit} + ) + except InvalidSampleRateException: + raise InvalidSampleRateHTTPException( + {"sample_rate": sample_rate, "valid_sample_rates": tts.VALID_SAMPLE_RATES} + ) else: return Response(audio, media_type="audio/wav") -@app.get("/speakers") -def speakers(): +@get("/speakers", summary="List available speakers", cache=CACHE_FOREVER) +async def speakers() -> dict[str, list[str]]: return tts.speakers + + +app = Litestar( + [generate, speakers], + openapi_config=OpenAPIConfig(title="Silero TTS API", version="1.0.0"), +) From 1fc4555df42bfdf809d238fe32f3df34b7a4e157 Mon Sep 17 00:00:00 2001 From: MrPandir Date: Tue, 13 Feb 2024 11:20:40 +0100 Subject: [PATCH 3/6] chore: rename server.py to app.py --- server.py => app.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename server.py => app.py (100%) diff --git a/server.py b/app.py similarity index 100% rename from server.py rename to app.py From 3334d212fa0affd0b67afd89b40d6386eb9f082f Mon Sep 17 00:00:00 2001 From: MrPandir Date: Tue, 13 Feb 2024 11:46:14 +0100 Subject: [PATCH 4/6] chore: update README --- .github/README.md | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/.github/README.md b/.github/README.md index c46d69d..41ad413 100644 --- a/.github/README.md +++ b/.github/README.md @@ -1,7 +1,7 @@ ![Header](./header.png) -## Languages supported +# Languages supported > [!NOTE] > All models are from the repository: [snakers4/silero-models](https://github.com/snakers4/silero-models) @@ -46,10 +46,10 @@ All languages support sample rate: 8 000, 24 000, 48 000 # Run API server ```bash -python3 main.py +litestar run ``` > [!NOTE] -> The default will be [localhost:8000](http://localhost:8000/docs). All endpoints can be viewed and tested at [localhost:8000/docs](http://localhost:8000/docs) +> The default will be [localhost:8000](http://localhost:8000/) # Run API server via docker ```bash @@ -61,7 +61,7 @@ docker run --rm -p 8000:8000 twirapp/silero-tts-api-server Build the API server image: ```bash -docker build --rm -f docker/Dockerfile -t silero-tts-api-server . +docker build -f docker/Dockerfile -t silero-tts-api-server . ``` Run the API server container: @@ -76,6 +76,23 @@ docker-compose -f docker/compose.yml up +# Documentation +You can view the automatically generated documentation based on OpenAPI at: + +| Provider | Url | +|--------|--------| +| [ReDoc](https://redocly.com/redoc) | https://localhost:8000/schema | +| [Swagger UI](https://swagger.io) | https://localhost:8000/schema/swagger | +| [Stoplight Elements](https://stoplight-site.webflow.io/open-source/elements) | https://localhost:8000/schema/elements | +| [RepiDoc](https://rapidocweb.com) | https://localhost:8000/schema/repidoc | +| OpenAPI schema yaml | https://localhost:8000/schema/openapi.yaml | +| OpenAPI schema json | https://localhost:8000/schema/openapi.json | + +# Endpoints + +- GET /generate - Generate audio from text +- GET /speakers - Get list of speakers + # Considerations for the future This repository is dedicated to twir.app and is designed to meet its requirements. From 6a1cad95785f2ccc464072a9cd71dab119cd4c42 Mon Sep 17 00:00:00 2001 From: MrPandir Date: Tue, 13 Feb 2024 12:12:39 +0100 Subject: [PATCH 5/6] fix: run command in dockerfile --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 3a819ab..55910ae 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -26,4 +26,4 @@ COPY --from=models-installer /app/models /app/models COPY --from=pip-installer /root/.local /root/.local ENV PATH=/root/.local/bin:$PATH -CMD ["litestar", "--app", "server:app", "run", "--host", "0.0.0.0", "--port", "8000"] +CMD ["litestar", "run", "--host", "0.0.0.0", "--port", "8000"] From e7e8512bd1b6acf38e27423381f10fcc6ec73dbd Mon Sep 17 00:00:00 2001 From: MrPandir <137798474+MrPandir@users.noreply.github.com> Date: Wed, 14 Feb 2024 10:17:43 +0100 Subject: [PATCH 6/6] chore: update README.md --- .github/README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/README.md b/.github/README.md index 41ad413..ab57e3f 100644 --- a/.github/README.md +++ b/.github/README.md @@ -90,8 +90,13 @@ You can view the automatically generated documentation based on OpenAPI at: # Endpoints -- GET /generate - Generate audio from text -- GET /speakers - Get list of speakers +- `GET` `/generate` - Generate audio in wav format from text +- `GET` `/speakers` - Get list of speakers + +# Environment variables: + +- `TEXT_LENGTH_LIMIT` - Maximum length of the text to be processed. Default is 930 characters. +- `MKL_NUM_THREADS` - Number of threads to use for generating audio. Default number of threads: number of CPU cores. # Considerations for the future This repository is dedicated to twir.app and is designed to meet its requirements.