From e0c56bf43b1a4ecfb021573923217dc89aafce6a Mon Sep 17 00:00:00 2001 From: BentiGorlich Date: Mon, 2 Oct 2023 18:37:46 +0200 Subject: [PATCH] Increase stability - save cache directly after each default string was fetched - add `OnDone` callback to `SynthesisListener` for that - add option to trigger a save cache for that as well - fix problems with the buffer size in `Mimic3TTSEngineWeb` while synthesizing text. Use `TextToSpeech.getMaxSpeechInputLength()` to determine the maximum buffer size supported - only make audio available after the fetch is completed. May have introduced white noise before due to slow connection. (when the bytes come slower than the bitrate of the audio playback is) - trim the text input when making the request to the web server --- .../activities/MainActivity.java | 3 +- .../tts/Mimic3TTSEngineWeb.java | 37 +++++++++++-------- .../tts/SynthesisListener.java | 10 +++++ 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/activities/MainActivity.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/activities/MainActivity.java index 66d6545..d54d36d 100644 --- a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/activities/MainActivity.java +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/activities/MainActivity.java @@ -217,7 +217,8 @@ private void synthesizeDefaultStrings() { if(SelectedSpeaker != null && !SelectedSpeaker.equals("")) voice += "#" + SelectedSpeaker; for(Map.Entry s : defaultStrings.entrySet()) { - Mimic3TTSEngineWeb.s_RunningService.dispatchSynthesisRequest(s.getValue(), voice, SpeechSpeed, new SynthesisListener(false), s.getKey()); + SynthesisListener listener = new SynthesisListener(false, () -> Mimic3TTSEngineWeb.s_RunningService.triggerSaveCache()); + Mimic3TTSEngineWeb.s_RunningService.dispatchSynthesisRequest(s.getValue(), voice, SpeechSpeed, listener, s.getKey()); } } } diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/tts/Mimic3TTSEngineWeb.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/tts/Mimic3TTSEngineWeb.java index 8d9e3de..c03adee 100644 --- a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/tts/Mimic3TTSEngineWeb.java +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/tts/Mimic3TTSEngineWeb.java @@ -59,7 +59,6 @@ public class Mimic3TTSEngineWeb extends TextToSpeechService { - public interface OnVoicesLoadedListener { void onVoicesLoaded(List voices); } @@ -95,7 +94,8 @@ public KVP(K key, V value) { private final Logger _Logger; private Thread T; private boolean Running; - private boolean FetchVoices = false; + private boolean FetchVoices = true; + private boolean SaveCache = false; private long MaxCacheSizeInB = 2L * 1024 * 1024 * 1024; private float MaxCacheSizeInGB = 2; @@ -182,7 +182,6 @@ public int onStartCommand(Intent intent, int flags, int startId) { _Logger.info("got action: " + intent.getAction()); s_RunningService = this; Running = true; - FetchVoices = true; if(intent != null) { String address = intent.getStringExtra("server_address"); if (address != null && !address.equals("")) @@ -230,6 +229,9 @@ private void main() { } else if (SynthesisRequest) { synthesizeText(SynthesisText, SynthesisVoice, SynthesisSpeechRate, Callback, SynthesisSpecialKey); SynthesisRequest = false; + } else if (SaveCache) { + saveCache(); + SaveCache = false; } else { try { Thread.sleep(100); @@ -438,7 +440,7 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate URL url = new URL(urlString); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); try { - byte[] outputBuffer = text.getBytes(StandardCharsets.UTF_8); + byte[] outputBuffer = text.trim().getBytes(StandardCharsets.UTF_8); conn.setRequestMethod("POST"); conn.setDoOutput(true); conn.setFixedLengthStreamingMode(outputBuffer.length); @@ -450,18 +452,27 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate String message = conn.getResponseMessage(); InputStream in = new BufferedInputStream(conn.getInputStream()); int nRead; - byte[] data = new byte[16384]; + int ttsMaxLength = TextToSpeech.getMaxSpeechInputLength(); + byte[] data = new byte[ttsMaxLength]; ByteArrayOutputStream byteBuffer = new ByteArrayOutputStream(); _Logger.info("Got raw data"); while ((nRead = in.read(data, 0, data.length)) != -1) { - synthesisCallback.audioAvailable(data, 0, nRead); byteBuffer.write(data, 0, nRead); } in.close(); + + byte[] completeData = byteBuffer.toByteArray(); _Logger.info("Got audio"); + for (int i = 0; i