Increase stability

- save cache directly after each default string was fetched - add `OnDone` callback to `SynthesisListener` for that - add option to trigger a save cache for that as well - fix problems with the buffer size in `Mimic3TTSEngineWeb` while synthesizing text. Use `TextToSpeech.getMaxSpeechInputLength()` to determine the maximum buffer size supported - only make audio available after the fetch is completed. May have introduced white noise before due to slow connection. (when the bytes come slower than the bitrate of the audio playback is) - trim the text input when making the request to the web server
BentiGorlich · Oct 2, 2023 · e0c56bf · e0c56bf
1 parent 1185da1
commit e0c56bf
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 16 deletions.
diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/activities/MainActivity.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/activities/MainActivity.java
@@ -217,7 +217,8 @@ private void synthesizeDefaultStrings() {
             if(SelectedSpeaker != null && !SelectedSpeaker.equals(""))
                 voice += "#" + SelectedSpeaker;
             for(Map.Entry<String, String> s : defaultStrings.entrySet()) {
-                Mimic3TTSEngineWeb.s_RunningService.dispatchSynthesisRequest(s.getValue(), voice, SpeechSpeed, new SynthesisListener(false), s.getKey());
+                SynthesisListener listener = new SynthesisListener(false, () -> Mimic3TTSEngineWeb.s_RunningService.triggerSaveCache());
+                Mimic3TTSEngineWeb.s_RunningService.dispatchSynthesisRequest(s.getValue(), voice, SpeechSpeed, listener, s.getKey());
             }
         }
     }

diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/tts/Mimic3TTSEngineWeb.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/tts/Mimic3TTSEngineWeb.java
@@ -59,7 +59,6 @@
 
 public class Mimic3TTSEngineWeb extends TextToSpeechService {
 
-
     public interface OnVoicesLoadedListener {
         void onVoicesLoaded(List<MimicVoice> voices);
     }
@@ -95,7 +94,8 @@ public KVP(K key, V value) {
     private final Logger _Logger;
     private Thread T;
     private boolean Running;
-    private boolean FetchVoices = false;
+    private boolean FetchVoices = true;
+    private boolean SaveCache = false;
 
     private long MaxCacheSizeInB = 2L * 1024 * 1024 * 1024;
     private float MaxCacheSizeInGB = 2;
@@ -182,7 +182,6 @@ public int onStartCommand(Intent intent, int flags, int startId) {
             _Logger.info("got action: " + intent.getAction());
         s_RunningService = this;
         Running = true;
-        FetchVoices = true;
         if(intent != null) {
             String address = intent.getStringExtra("server_address");
             if (address != null && !address.equals(""))
@@ -230,6 +229,9 @@ private void main() {
                 } else if (SynthesisRequest) {
                     synthesizeText(SynthesisText, SynthesisVoice, SynthesisSpeechRate, Callback, SynthesisSpecialKey);
                     SynthesisRequest = false;
+                } else if (SaveCache) {
+                    saveCache();
+                    SaveCache = false;
                 } else {
                     try {
                         Thread.sleep(100);
@@ -438,7 +440,7 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate
             URL url = new URL(urlString);
             HttpURLConnection conn = (HttpURLConnection) url.openConnection();
             try {
-                byte[] outputBuffer = text.getBytes(StandardCharsets.UTF_8);
+                byte[] outputBuffer = text.trim().getBytes(StandardCharsets.UTF_8);
                 conn.setRequestMethod("POST");
                 conn.setDoOutput(true);
                 conn.setFixedLengthStreamingMode(outputBuffer.length);
@@ -450,18 +452,27 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate
                 String message = conn.getResponseMessage();
                 InputStream in = new BufferedInputStream(conn.getInputStream());
                 int nRead;
-                byte[] data = new byte[16384];
+                int ttsMaxLength = TextToSpeech.getMaxSpeechInputLength();
+                byte[] data = new byte[ttsMaxLength];
                 ByteArrayOutputStream byteBuffer = new ByteArrayOutputStream();
 
                 _Logger.info("Got raw data");
                 while ((nRead = in.read(data, 0, data.length)) != -1) {
-                    synthesisCallback.audioAvailable(data, 0, nRead);
                     byteBuffer.write(data, 0, nRead);
                 }
                 in.close();
+
+                byte[] completeData = byteBuffer.toByteArray();
                 _Logger.info("Got audio");
+                for (int i = 0; i<completeData.length/ttsMaxLength; i++) {
+                    int start = i * ttsMaxLength;
+                    int end = start + ttsMaxLength;
+                    if(completeData.length < end)
+                        end = completeData.length;
+                    synthesisCallback.audioAvailable(completeData, start, end - start);
+                }
+
                 synthesisCallback.done();
-                byte[] completeData = byteBuffer.toByteArray();
                 CacheEntry cacheEntry = new CacheEntry();
                 cacheEntry.Text = text;
                 cacheEntry.ByteSize = completeData.length;
@@ -471,16 +482,10 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate
             }
         } catch (MalformedURLException ex) {
             _Logger.severe("Malformed server url: " + ex.getMessage());
-            ex.printStackTrace();
             synthesisCallback.error();
             synthesisCallback.done();
         } catch (IOException ex) {
             _Logger.severe("Connection error: " + ex.getMessage());
-            ex.printStackTrace();
-            for (StackTraceElement el : ex.getStackTrace()) {
-                _Logger.warning("at " + el.toString());
-            }
-
             if(SpecialCache.containsKey("default_no_connection")) {
                 CacheEntry noConn = SpecialCache.get("default_no_connection");
                 File noConnFile = new File(Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(), "default_no_connection");
@@ -489,7 +494,7 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate
                         InputStream in = Files.newInputStream(noConnFile.toPath());
                         ByteArrayOutputStream byteBuffer = new ByteArrayOutputStream();
                         int nRead;
-                        byte[] data = new byte[16384];
+                        byte[] data = new byte[TextToSpeech.getMaxSpeechInputLength()];
                         _Logger.info("Got raw data");
                         while ((nRead = in.read(data, 0, data.length)) != -1) {
                             synthesisCallback.audioAvailable(data, 0, nRead);
@@ -544,7 +549,7 @@ private void synthesizeTextFromCache(boolean specialKeySet, String specialKey, S
         try {
             InputStream in = new BufferedInputStream(Files.newInputStream(cacheFile.toPath()));
             int nRead;
-            byte[] data = new byte[16384];
+            byte[] data = new byte[TextToSpeech.getMaxSpeechInputLength()];
 
             _Logger.info("Got raw data");
             while ((nRead = in.read(data, 0, data.length)) != -1) {
@@ -818,6 +823,8 @@ public void triggerLoadVoices() {
         FetchVoices = true;
     }
 
+    public void triggerSaveCache() { SaveCache = true; }
+
     public void clearCache(boolean clearSpecialCacheToo) {
         if(clearSpecialCacheToo)
             clearSpecialCache();

diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/tts/SynthesisListener.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/tts/SynthesisListener.java
@@ -21,13 +21,19 @@ public class SynthesisListener implements SynthesisCallback {
     int ChannelCount;
     ByteArrayOutputStream AudioBuffer = new ByteArrayOutputStream();
     Logger _Logger;
+    Runnable OnDone = null;
 
     public SynthesisListener(boolean playOnFinish) {
         _Logger = Logger.getLogger(this.getClass().toString());
         LogManager.getLogManager().addLogger(_Logger);
         PlayOnFinish = playOnFinish;
     }
 
+    public SynthesisListener(boolean playOnFinish, Runnable onDone) {
+        this(playOnFinish);
+        OnDone = onDone;
+    }
+
     @Override
     public int getMaxBufferSize() {
         return Integer.MAX_VALUE;
@@ -56,6 +62,10 @@ public int audioAvailable(byte[] buffer, int offset, int length) {
 
     @Override
     public int done() {
+        if(OnDone != null) {
+            OnDone.run();
+        }
+
         if(!PlayOnFinish) {
             _Logger.info("Synthesis done");
             return 0;