Skip to content

Commit

Permalink
Increase stability
Browse files Browse the repository at this point in the history
- save cache directly after each default string was fetched
- add `OnDone` callback to `SynthesisListener` for that
- add option to trigger a save cache for that as well
- fix problems with the buffer size in `Mimic3TTSEngineWeb` while synthesizing text. Use `TextToSpeech.getMaxSpeechInputLength()` to determine the maximum buffer size supported
- only make audio available after the fetch is completed. May have introduced white noise before due to slow connection. (when the bytes come slower than the bitrate of the audio playback is)
- trim the text input when making the request to the web server
  • Loading branch information
BentiGorlich committed Oct 2, 2023
1 parent 1185da1 commit e0c56bf
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ private void synthesizeDefaultStrings() {
if(SelectedSpeaker != null && !SelectedSpeaker.equals(""))
voice += "#" + SelectedSpeaker;
for(Map.Entry<String, String> s : defaultStrings.entrySet()) {
Mimic3TTSEngineWeb.s_RunningService.dispatchSynthesisRequest(s.getValue(), voice, SpeechSpeed, new SynthesisListener(false), s.getKey());
SynthesisListener listener = new SynthesisListener(false, () -> Mimic3TTSEngineWeb.s_RunningService.triggerSaveCache());
Mimic3TTSEngineWeb.s_RunningService.dispatchSynthesisRequest(s.getValue(), voice, SpeechSpeed, listener, s.getKey());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@

public class Mimic3TTSEngineWeb extends TextToSpeechService {


public interface OnVoicesLoadedListener {
void onVoicesLoaded(List<MimicVoice> voices);
}
Expand Down Expand Up @@ -95,7 +94,8 @@ public KVP(K key, V value) {
private final Logger _Logger;
private Thread T;
private boolean Running;
private boolean FetchVoices = false;
private boolean FetchVoices = true;
private boolean SaveCache = false;

private long MaxCacheSizeInB = 2L * 1024 * 1024 * 1024;
private float MaxCacheSizeInGB = 2;
Expand Down Expand Up @@ -182,7 +182,6 @@ public int onStartCommand(Intent intent, int flags, int startId) {
_Logger.info("got action: " + intent.getAction());
s_RunningService = this;
Running = true;
FetchVoices = true;
if(intent != null) {
String address = intent.getStringExtra("server_address");
if (address != null && !address.equals(""))
Expand Down Expand Up @@ -230,6 +229,9 @@ private void main() {
} else if (SynthesisRequest) {
synthesizeText(SynthesisText, SynthesisVoice, SynthesisSpeechRate, Callback, SynthesisSpecialKey);
SynthesisRequest = false;
} else if (SaveCache) {
saveCache();
SaveCache = false;
} else {
try {
Thread.sleep(100);
Expand Down Expand Up @@ -438,7 +440,7 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
try {
byte[] outputBuffer = text.getBytes(StandardCharsets.UTF_8);
byte[] outputBuffer = text.trim().getBytes(StandardCharsets.UTF_8);
conn.setRequestMethod("POST");
conn.setDoOutput(true);
conn.setFixedLengthStreamingMode(outputBuffer.length);
Expand All @@ -450,18 +452,27 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate
String message = conn.getResponseMessage();
InputStream in = new BufferedInputStream(conn.getInputStream());
int nRead;
byte[] data = new byte[16384];
int ttsMaxLength = TextToSpeech.getMaxSpeechInputLength();
byte[] data = new byte[ttsMaxLength];
ByteArrayOutputStream byteBuffer = new ByteArrayOutputStream();

_Logger.info("Got raw data");
while ((nRead = in.read(data, 0, data.length)) != -1) {
synthesisCallback.audioAvailable(data, 0, nRead);
byteBuffer.write(data, 0, nRead);
}
in.close();

byte[] completeData = byteBuffer.toByteArray();
_Logger.info("Got audio");
for (int i = 0; i<completeData.length/ttsMaxLength; i++) {
int start = i * ttsMaxLength;
int end = start + ttsMaxLength;
if(completeData.length < end)
end = completeData.length;
synthesisCallback.audioAvailable(completeData, start, end - start);
}

synthesisCallback.done();
byte[] completeData = byteBuffer.toByteArray();
CacheEntry cacheEntry = new CacheEntry();
cacheEntry.Text = text;
cacheEntry.ByteSize = completeData.length;
Expand All @@ -471,16 +482,10 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate
}
} catch (MalformedURLException ex) {
_Logger.severe("Malformed server url: " + ex.getMessage());
ex.printStackTrace();
synthesisCallback.error();
synthesisCallback.done();
} catch (IOException ex) {
_Logger.severe("Connection error: " + ex.getMessage());
ex.printStackTrace();
for (StackTraceElement el : ex.getStackTrace()) {
_Logger.warning("at " + el.toString());
}

if(SpecialCache.containsKey("default_no_connection")) {
CacheEntry noConn = SpecialCache.get("default_no_connection");
File noConnFile = new File(Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(), "default_no_connection");
Expand All @@ -489,7 +494,7 @@ private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate
InputStream in = Files.newInputStream(noConnFile.toPath());
ByteArrayOutputStream byteBuffer = new ByteArrayOutputStream();
int nRead;
byte[] data = new byte[16384];
byte[] data = new byte[TextToSpeech.getMaxSpeechInputLength()];
_Logger.info("Got raw data");
while ((nRead = in.read(data, 0, data.length)) != -1) {
synthesisCallback.audioAvailable(data, 0, nRead);
Expand Down Expand Up @@ -544,7 +549,7 @@ private void synthesizeTextFromCache(boolean specialKeySet, String specialKey, S
try {
InputStream in = new BufferedInputStream(Files.newInputStream(cacheFile.toPath()));
int nRead;
byte[] data = new byte[16384];
byte[] data = new byte[TextToSpeech.getMaxSpeechInputLength()];

_Logger.info("Got raw data");
while ((nRead = in.read(data, 0, data.length)) != -1) {
Expand Down Expand Up @@ -818,6 +823,8 @@ public void triggerLoadVoices() {
FetchVoices = true;
}

public void triggerSaveCache() { SaveCache = true; }

public void clearCache(boolean clearSpecialCacheToo) {
if(clearSpecialCacheToo)
clearSpecialCache();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,19 @@ public class SynthesisListener implements SynthesisCallback {
int ChannelCount;
ByteArrayOutputStream AudioBuffer = new ByteArrayOutputStream();
Logger _Logger;
Runnable OnDone = null;

public SynthesisListener(boolean playOnFinish) {
_Logger = Logger.getLogger(this.getClass().toString());
LogManager.getLogManager().addLogger(_Logger);
PlayOnFinish = playOnFinish;
}

public SynthesisListener(boolean playOnFinish, Runnable onDone) {
this(playOnFinish);
OnDone = onDone;
}

@Override
public int getMaxBufferSize() {
return Integer.MAX_VALUE;
Expand Down Expand Up @@ -56,6 +62,10 @@ public int audioAvailable(byte[] buffer, int offset, int length) {

@Override
public int done() {
if(OnDone != null) {
OnDone.run();
}

if(!PlayOnFinish) {
_Logger.info("Synthesis done");
return 0;
Expand Down

0 comments on commit e0c56bf

Please sign in to comment.