From 489c6c19596e1382d79b602c23d84f3785c3a65b Mon Sep 17 00:00:00 2001
From: Michael Hansen <michael.hansen.24@us.af.mil>
Date: Wed, 21 Apr 2021 16:25:14 -0400
Subject: [PATCH] Allow <voice>;<vocoder> format for MaryTTS voice

---
 README.md        | 10 ++++++++++
 larynx/server.py |  7 +++++--
 2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 3b9b90b..76f00d3 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,16 @@ $ docker run -it -p 59125:5002 rhasspy/larynx:<LANG>
 
 The `/process` HTTP endpoint should now work for voices formatted as `<LANG>/<VOICE>` such as `en-us/harvard-glow_tts`.
 
+You can specify the vocoder by adding `;<VOCODER>` to the MaryTTS voice.
+
+For example: `en-us/harvard-glow_tts;hifi_gan:vctk_small` will use the lowest quality (but fastest) vocoder. This is usually necessary to get decent performance on a Raspberry Pi.
+
+Available vocoders are:
+
+* `hifi_gan:universal_large` (best quality, slowest, default)
+* `hifi_gan:vctk_medium` (medium quality)
+* `hifi_gan:vctk_small` (lowest quality, fastest)
+
 ## Python Installation
 
 ```sh
diff --git a/larynx/server.py b/larynx/server.py
index 5f2f1bc..b271ea8 100755
--- a/larynx/server.py
+++ b/larynx/server.py
@@ -15,7 +15,6 @@
 from urllib.parse import parse_qs
 from uuid import uuid4
 
-import gruut_ipa
 import hypercorn
 import numpy as np
 import quart_cors
@@ -411,7 +410,11 @@ async def api_process():
         text = request.args.get("INPUT_TEXT", "")
         voice = request.args.get("VOICE", "")
 
-    wav_bytes = await text_to_wav(text, voice, vocoder=_DEFAULT_VOCODER)
+    # <VOICE>;<VOCODER>
+    voice, vocoder = voice.split(";", maxsplit=1)
+    vocoder = vocoder or _DEFAULT_VOCODER
+
+    wav_bytes = await text_to_wav(text, voice, vocoder=vocoder)
 
     return Response(wav_bytes, mimetype="audio/wav")