From 489c6c19596e1382d79b602c23d84f3785c3a65b Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Wed, 21 Apr 2021 16:25:14 -0400 Subject: [PATCH] Allow ; format for MaryTTS voice --- README.md | 10 ++++++++++ larynx/server.py | 7 +++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3b9b90b..76f00d3 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,16 @@ $ docker run -it -p 59125:5002 rhasspy/larynx: The `/process` HTTP endpoint should now work for voices formatted as `/` such as `en-us/harvard-glow_tts`. +You can specify the vocoder by adding `;` to the MaryTTS voice. + +For example: `en-us/harvard-glow_tts;hifi_gan:vctk_small` will use the lowest quality (but fastest) vocoder. This is usually necessary to get decent performance on a Raspberry Pi. + +Available vocoders are: + +* `hifi_gan:universal_large` (best quality, slowest, default) +* `hifi_gan:vctk_medium` (medium quality) +* `hifi_gan:vctk_small` (lowest quality, fastest) + ## Python Installation ```sh diff --git a/larynx/server.py b/larynx/server.py index 5f2f1bc..b271ea8 100755 --- a/larynx/server.py +++ b/larynx/server.py @@ -15,7 +15,6 @@ from urllib.parse import parse_qs from uuid import uuid4 -import gruut_ipa import hypercorn import numpy as np import quart_cors @@ -411,7 +410,11 @@ async def api_process(): text = request.args.get("INPUT_TEXT", "") voice = request.args.get("VOICE", "") - wav_bytes = await text_to_wav(text, voice, vocoder=_DEFAULT_VOCODER) + # ; + voice, vocoder = voice.split(";", maxsplit=1) + vocoder = vocoder or _DEFAULT_VOCODER + + wav_bytes = await text_to_wav(text, voice, vocoder=vocoder) return Response(wav_bytes, mimetype="audio/wav")