diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..aa724b7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..b22ee15 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +Mimic3 TTS Engine Wrapper \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..b589d56 --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/gradle.xml b/.idea/gradle.xml new file mode 100644 index 0000000..ae388c2 --- /dev/null +++ b/.idea/gradle.xml @@ -0,0 +1,20 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..8978d23 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,9 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ReadMe.md b/ReadMe.md new file mode 100644 index 0000000..2edd334 --- /dev/null +++ b/ReadMe.md @@ -0,0 +1,33 @@ +# Mimic3 TTS Service Wrapper For Android +This app is a wrapper around the mimic3 webserver. It registers itself to android, so you can use it system wide. +At the moment this app requires a running mimic3 server, but in the future it might be able to run locally. +For that to work the main problem is, that the `onnxruntime` which mimic3 depends on is not supported +by the gradle plugin `chaquopy`, which enables Android apps to use python packages. See [#216](https://github.com/chaquo/chaquopy/issues/216) in the [chaquopy](https://github.com/chaquo/chaquopy) repo + +![Mimic3 Logo](https://github.com/MycroftAI/mimic3/raw/master/img/Mimic_color.svg) [Mimic3](https://github.com/MycroftAI/mimic3) + +# Quickstart + +You need: +1. Android phone running Android 7 or above +2. A machine running the mimic3 webserver +3. A router which can forward ports (or an exposed webserver) + +In the app you can set the server address of your server. In the future I may provide a default one. +There is a docker image for the mimic3 webserver: `mycroftai/mimic3` [doc](https://mycroft-ai.gitbook.io/docs/mycroft-technologies/mimic-tts/mimic-3#docker-image) + +NOTE: you have to run this behind a reverse proxy, because android forbids traffic to http websites by default. +I use a docker image for that: `jc21/nginx-proxy-manager` [doc](https://github.com/NginxProxyManager/nginx-proxy-manager) + +# Features + +- Uses Mimic3 a open source, fast and good quality TTS engine +- Mimic3 server can run on low-end hardware like the Raspberry Pi 4 +- Supports 25 languages with multiple voices and speakers, see [Mimic3 voices](https://github.com/MycroftAI/mimic3-voices) for more details +- Supports caching +- Settings for speech speed, audio volatility, phoneme volatility and cache size. An explanation of audio and phoneme volatility is available by clicking on their labels + +# Screenshots +| Main Screen | Settings Screen | +| - | - | +| ![Main Screen](images/Main.png) | ![Settings Screen](images/Settings.png) | diff --git a/app/.gitignore b/app/.gitignore new file mode 100644 index 0000000..42afabf --- /dev/null +++ b/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/app/build.gradle b/app/build.gradle new file mode 100644 index 0000000..8420dfb --- /dev/null +++ b/app/build.gradle @@ -0,0 +1,56 @@ +plugins { + id 'com.android.application' + //id 'com.chaquo.python' +} + +android { + namespace 'de.bentigorlich.mimic3ttsenginewrapper' + compileSdk 33 + + defaultConfig { + applicationId "de.bentigorlich.mimic3ttsenginewrapper" + minSdk 26 + targetSdk 33 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + + ndk { + abiFilters "armeabi-v7a", "arm64-v8a", "x86", "x86_64" + } + + //python { + // version "3.8" + // pip { + // install "mycroft-mimic3-tts[all]" + // } + //} + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + compileOptions { + sourceCompatibility JavaVersion.VERSION_1_8 + targetCompatibility JavaVersion.VERSION_1_8 + } + buildFeatures { + viewBinding true + } +} + +dependencies { + + implementation 'androidx.appcompat:appcompat:1.6.1' + implementation 'com.google.android.material:material:1.9.0' + implementation 'androidx.constraintlayout:constraintlayout:2.1.4' + implementation 'androidx.preference:preference:1.2.1' + testImplementation 'junit:junit:4.13.2' + androidTestImplementation 'androidx.test.ext:junit:1.1.5' + androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1' + implementation 'com.google.code.gson:gson:2.10.1' +} \ No newline at end of file diff --git a/app/proguard-rules.pro b/app/proguard-rules.pro new file mode 100644 index 0000000..481bb43 --- /dev/null +++ b/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/app/src/androidTest/java/de/bentigorlich/mimic3ttsenginewrapper/ExampleInstrumentedTest.java b/app/src/androidTest/java/de/bentigorlich/mimic3ttsenginewrapper/ExampleInstrumentedTest.java new file mode 100644 index 0000000..c311dec --- /dev/null +++ b/app/src/androidTest/java/de/bentigorlich/mimic3ttsenginewrapper/ExampleInstrumentedTest.java @@ -0,0 +1,26 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import android.content.Context; + +import androidx.test.platform.app.InstrumentationRegistry; +import androidx.test.ext.junit.runners.AndroidJUnit4; + +import org.junit.Test; +import org.junit.runner.RunWith; + +import static org.junit.Assert.*; + +/** + * Instrumented test, which will execute on an Android device. + * + * @see Testing documentation + */ +@RunWith(AndroidJUnit4.class) +public class ExampleInstrumentedTest { + @Test + public void useAppContext() { + // Context of the app under test. + Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext(); + assertEquals("de.bentigorlich.mozillattsenginewrapper", appContext.getPackageName()); + } +} \ No newline at end of file diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000..00abada --- /dev/null +++ b/app/src/main/AndroidManifest.xml @@ -0,0 +1,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/CacheEntry.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/CacheEntry.java new file mode 100644 index 0000000..fe775be --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/CacheEntry.java @@ -0,0 +1,21 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import androidx.annotation.NonNull; + +import java.util.Date; + +public class CacheEntry implements Cloneable { + public String Text; + public Date LastUsed; + public long ByteSize; + + @NonNull + @Override + public CacheEntry clone() { + CacheEntry copy = new CacheEntry(); + copy.Text = Text; + copy.LastUsed = LastUsed; + copy.ByteSize = ByteSize; + return copy; + } +} diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/CheckTTSDataActivity.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/CheckTTSDataActivity.java new file mode 100644 index 0000000..90c5b4a --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/CheckTTSDataActivity.java @@ -0,0 +1,103 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import android.app.Activity; +import android.content.Intent; +import android.os.Bundle; +import android.speech.tts.TextToSpeech; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Optional; +import java.util.logging.LogManager; +import java.util.logging.Logger; + +public class CheckTTSDataActivity extends Activity { + + Logger _Logger; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + + _Logger = Logger.getLogger(this.getClass().toString()); + LogManager.getLogManager().addLogger(_Logger); + + String action = getIntent().getAction(); + if(action != null) { + if(action.equals(TextToSpeech.Engine.ACTION_GET_SAMPLE_TEXT)) { + getSampleTextForIntent(); + } else if (action.equals(TextToSpeech.Engine.ACTION_CHECK_TTS_DATA)) { + getCheckTTSDataForIntent(); + } else { + _Logger.warning("called with action that is not implemented: " + action); + } + } + } + + private void getSampleTextForIntent() { + if(Mimic3TTSEngineWeb.s_RunningService != null) { + final Intent intent = getIntent(); + final String language = intent.getStringExtra("language"); + final String country = intent.getStringExtra("country"); + final String variant = intent.getStringExtra("variant"); + String voiceName = Mimic3TTSEngineWeb.s_RunningService.onGetDefaultVoiceNameFor(language, country, variant); + List voices = Mimic3TTSEngineWeb.s_RunningService.getMimicVoices(); + Optional matchVoice = voices.stream().filter(voice -> voice.key.equals(voiceName)).findAny(); + int result; + String text = null; + if(matchVoice.isPresent()) { + MimicVoice v = matchVoice.get(); + text = v.sample_text; + result = TextToSpeech.LANG_AVAILABLE; + } else { + result = TextToSpeech.LANG_NOT_SUPPORTED; + } + + + final Intent returnData = new Intent(); + if(text != null) + returnData.putExtra(TextToSpeech.Engine.EXTRA_SAMPLE_TEXT, text); + setResult(result, returnData); + finish(); + } else { + int result = TextToSpeech.LANG_NOT_SUPPORTED; + final Intent returnData = new Intent(); + setResult(result, returnData); + finish(); + } + } + + private void getCheckTTSDataForIntent() { + if(Mimic3TTSEngineWeb.s_RunningService != null) { + List voices = Mimic3TTSEngineWeb.s_RunningService.getMimicVoices(); + ArrayList availableVoices = new ArrayList<>(); + Locale[] availableLocales = Locale.getAvailableLocales(); + for(MimicVoice voice : voices) { + String[] languageParts = voice.language.replace("_", "-").split("-"); + String language = languageParts[0]; + String country = languageParts.length == 2 ? languageParts[1] : null; + Locale locale; + Locale.Builder builder = new Locale.Builder(); + builder.setLanguage(language); + if (country != null) + builder.setRegion(country); + locale = builder.build(); + if (Arrays.stream(availableLocales).anyMatch(l -> l.toString().equals(locale.toString()))) + availableVoices.add(locale.toString()); + } + final Intent returnData = new Intent(); + returnData.putStringArrayListExtra(TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES, availableVoices); + returnData.putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, new ArrayList<>()); + setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_PASS, returnData); + finish(); + } else { + final Intent returnData = new Intent(); + returnData.putStringArrayListExtra(TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES, new ArrayList<>()); + returnData.putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, new ArrayList<>()); + setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_FAIL, returnData); + finish(); + } + } +} \ No newline at end of file diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/MainActivity.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/MainActivity.java new file mode 100644 index 0000000..e6d10f0 --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/MainActivity.java @@ -0,0 +1,384 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import static de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWrapperApp.PREF_AUDIO_VOLATILITY; +import static de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWrapperApp.PREF_CACHE_ACTIVATE; +import static de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWrapperApp.PREF_LANGUAGE; +import static de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWrapperApp.PREF_PHONEME_VOLATILITY; +import static de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWrapperApp.PREF_SERVER_ADDRESS; +import static de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWrapperApp.PREF_SPEAKER; +import static de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWrapperApp.PREF_SPEED; +import static de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWrapperApp.PREF_VOICE; + +import androidx.annotation.NonNull; +import androidx.appcompat.app.AppCompatActivity; +import androidx.preference.PreferenceManager; + +import android.app.AlertDialog; +import android.content.Context; +import android.content.Intent; +import android.content.SharedPreferences; +import android.content.res.Resources; +import android.os.Bundle; +import android.view.MenuItem; +import android.view.View; +import android.widget.AdapterView; +import android.widget.ArrayAdapter; +import android.widget.EditText; +import android.widget.SeekBar; +import android.widget.Spinner; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.LogManager; +import java.util.logging.Logger; + +public class MainActivity extends AppCompatActivity implements SeekBar.OnSeekBarChangeListener, + AdapterView.OnItemSelectedListener, View.OnClickListener, Mimic3TTSEngineWeb.OnVoicesLoadedListener, + SharedPreferences.OnSharedPreferenceChangeListener, Mimic3TTSEngineWeb.OnLoadedListener, Mimic3TTSEngineWeb.OnErrorListener { + List Voices; + HashMap>> Languages; + HashMap VoiceMap; + + private String SelectedLanguage; + private String SelectedVoice; + private String SelectedSpeaker; + private int SpeechSpeed; + private int AudioVolatility; + private int PhonemeVolatility; + SharedPreferences SharedPreferences; + + private final Logger _Logger; + + public MainActivity() { + _Logger = Logger.getLogger(this.getClass().toString()); + LogManager.getLogManager().addLogger(_Logger); + } + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + + setSupportActionBar(findViewById(R.id.toolbar)); + setContentView(R.layout.activity_main); + + SharedPreferences = PreferenceManager.getDefaultSharedPreferences(Mimic3TTSEngineWrapperApp.getStorageContext()); + SharedPreferences.registerOnSharedPreferenceChangeListener(this); + SelectedLanguage = SharedPreferences.getString(PREF_LANGUAGE, ""); + SelectedVoice = SharedPreferences.getString(PREF_VOICE, ""); + SelectedSpeaker = SharedPreferences.getString(PREF_SPEAKER, ""); + SpeechSpeed = SharedPreferences.getInt(PREF_SPEED, 100); + AudioVolatility = SharedPreferences.getInt(PREF_AUDIO_VOLATILITY, 677); + PhonemeVolatility = SharedPreferences.getInt(PREF_PHONEME_VOLATILITY, 800); + + Mimic3TTSEngineWeb.s_ServerAddress = SharedPreferences.getString(PREF_SERVER_ADDRESS, ""); + Mimic3TTSEngineWeb.addLoadedListener(this); + if (Mimic3TTSEngineWeb.s_RunningService == null) { + Intent startIntent = new Intent(MainActivity.this, de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWeb.class); + startIntent.putExtra(PREF_SERVER_ADDRESS, Mimic3TTSEngineWeb.s_ServerAddress); + startService(startIntent); + } else { + onVoicesLoaded(Mimic3TTSEngineWeb.s_RunningService.getMimicVoices()); + } + + adjustMissingServerAddressError(); + + findViewById(R.id.btn_speak).setOnClickListener(this); + findViewById(R.id.lbl_audio_volatility).setOnClickListener(this); + findViewById(R.id.lbl_phonemeVolatility).setOnClickListener(this); + SeekBar s = findViewById(R.id.speedBar); + s.setOnSeekBarChangeListener(this); + s.setProgress(SpeechSpeed); + + s = findViewById(R.id.audioVolatilityBar); + s.setOnSeekBarChangeListener(this); + s.setProgress(AudioVolatility); + + s = findViewById(R.id.phonemeVolatilityBar); + s.setOnSeekBarChangeListener(this); + s.setProgress(PhonemeVolatility); + } + + private void adjustMissingServerAddressError() { + if (Mimic3TTSEngineWeb.s_ServerAddress == null || Mimic3TTSEngineWeb.s_ServerAddress.equals("") || Mimic3TTSEngineWeb.s_ServerAddress.equals("https://") || Mimic3TTSEngineWeb.s_ServerAddress.equals("http://")) + findViewById(R.id.server_missing).setVisibility(View.VISIBLE); + else + findViewById(R.id.server_missing).setVisibility(View.GONE); + } + + @Override + protected void onDestroy() { + super.onDestroy(); + Mimic3TTSEngineWeb.removeLoadedListener(this); + SharedPreferences.unregisterOnSharedPreferenceChangeListener(this); + if (Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.removeVoicesLoadedListener(this); + Mimic3TTSEngineWeb.s_RunningService.removeErrorListener(this); + } + } + + private void setLanguageItems(String[] languages) { + Arrays.sort(languages); + Spinner languageDD = findViewById(R.id.language); + languageDD.setAdapter(new ArrayAdapter<>(this, androidx.appcompat.R.layout.support_simple_spinner_dropdown_item, languages)); + for (int i = 0; i < languages.length; i++) { + if (languages[i].equals(SelectedLanguage)) + languageDD.setSelection(i); + } + } + + public void setSelectedLanguage(String language) { + if(!language.equals(SelectedLanguage)) { + SelectedLanguage = language; + SharedPreferences.edit().putString(PREF_LANGUAGE, SelectedLanguage).apply(); + + if (Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.clearCache(true); + } + } + } + + private void setVoiceItems(String[] voices) { + Spinner voicesDD = findViewById(R.id.voices); + voicesDD.setAdapter(new ArrayAdapter<>(this, androidx.appcompat.R.layout.support_simple_spinner_dropdown_item, voices)); + for (int i = 0; i < voices.length; i++) { + if (voices[i].equals(SelectedVoice)) + voicesDD.setSelection(i); + } + } + + public void setSelectedVoice(String voice) { + if(!voice.equals(SelectedVoice)) { + SelectedVoice = voice; + SharedPreferences.edit().putString(PREF_VOICE, SelectedVoice).apply(); + + if (Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.clearCache(true); + MimicVoice mimicVoice = VoiceMap.get(voice); + if(mimicVoice != null && (mimicVoice.speakers == null || mimicVoice.speakers.length == 0)) + synthesizeDefaultStrings(); + } + } + } + + private void setSpeakerItems(String[] speakers) { + Spinner speakerDD = findViewById(R.id.speakers); + speakerDD.setAdapter(new ArrayAdapter<>(this, androidx.appcompat.R.layout.support_simple_spinner_dropdown_item, speakers)); + for (int i = 0; i < speakers.length; i++) { + if (speakers[i].equals(SelectedSpeaker)) + speakerDD.setSelection(i); + } + } + + public void setSelectedSpeaker(String speaker) { + if(!speaker.equals(SelectedSpeaker)) { + SelectedSpeaker = speaker; + SharedPreferences.edit().putString(PREF_SPEAKER, SelectedSpeaker).apply(); + + if (Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.clearCache(true); + synthesizeDefaultStrings(); + } + } + } + + private void synthesizeDefaultStrings() { + if (Mimic3TTSEngineWeb.s_RunningService != null) { + _Logger.info("synthesizing default strings"); + HashMap defaultStrings = new HashMap() {{ + put("default_no_connection", getString(R.string.default_no_connection)); + }}; + + String voice = SelectedVoice; + if(SelectedSpeaker != null && !SelectedSpeaker.equals("")) + voice += "#" + SelectedSpeaker; + for(Map.Entry s : defaultStrings.entrySet()) { + Mimic3TTSEngineWeb.s_RunningService.dispatchSynthesisRequest(s.getValue(), voice, SpeechSpeed, new SynthesisListener(false), s.getKey()); + } + } + } + + @Override + public void onItemSelected(AdapterView parent, View view, int position, long id) { + if (parent.getId() == R.id.language) { + setSelectedLanguage((String) parent.getItemAtPosition(position)); + HashMap> voices = Languages.get(SelectedLanguage); + if (voices != null) { + setVoiceItems(voices.keySet().toArray(new String[0])); + } + } else if (parent.getId() == R.id.voices) { + setSelectedVoice((String) parent.getItemAtPosition(position)); + HashMap> voices = Languages.get(SelectedLanguage); + if (voices != null) { + List speakers = voices.get(SelectedVoice); + if (speakers != null) { + setSpeakerItems(speakers.toArray(new String[0])); + } + } + } else if (parent.getId() == R.id.speakers) { + setSelectedSpeaker((String) parent.getItemAtPosition(position)); + } + } + + @Override + public void onNothingSelected(AdapterView parent) { + if (parent.getId() == R.id.language) { + SelectedLanguage = null; + SelectedVoice = null; + SelectedSpeaker = null; + setVoiceItems(new String[0]); + setSpeakerItems(new String[0]); + + if (Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.clearCache(true); + } + } else if (parent.getId() == R.id.voices) { + SelectedVoice = null; + SelectedSpeaker = null; + setSpeakerItems(new String[0]); + + if (Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.clearCache(true); + } + } else if (parent.getId() == R.id.speakers) { + SelectedSpeaker = null; + + if (Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.clearCache(true); + } + } + } + + @Override + public void onClick(View view) { + if (view.getId() == R.id.btn_speak) { + EditText input = findViewById(R.id.testText); + String inputText = input.getText().toString(); + if (Mimic3TTSEngineWeb.s_RunningService != null && !inputText.equals("") && SelectedVoice != null && !SelectedVoice.equals("")) { + String voice = SelectedVoice; + if (SelectedSpeaker != null && !SelectedSpeaker.equals("")) + voice += "#" + SelectedSpeaker; + Mimic3TTSEngineWeb.s_RunningService.dispatchSynthesisRequest(inputText, voice, SpeechSpeed, new SynthesisListener(true), null); + } + } else if(view.getId() == R.id.lbl_audio_volatility) { + new AlertDialog.Builder(this) + .setTitle(R.string.lbl_audio_volatility) + .setMessage(R.string.lbl_audio_volatility_tooltip) + .setPositiveButton(R.string.ok, null) + .show(); + } else if(view.getId() == R.id.lbl_phonemeVolatility) { + new AlertDialog.Builder(this) + .setTitle(R.string.lbl_phoneme_volatility) + .setMessage(R.string.lbl_phoneme_volatility_tooltip) + .setPositiveButton(R.string.ok, null) + .show(); + } + } + + public void onMenuItemClick(@NonNull MenuItem menuItem) { + if (menuItem.getItemId() == R.id.menu_settings) { + startActivity(new Intent(MainActivity.this, SettingsActivity.class)); + } + } + + @Override + public void onVoicesLoaded(List voices) { + MainActivity main = this; + runOnUiThread(() -> { + Languages = new HashMap<>(); + VoiceMap = new HashMap<>(); + Voices = voices; + for (MimicVoice voice : Voices) { + if (!Languages.containsKey(voice.language)) { + Languages.put(voice.language, new HashMap<>()); + } + HashMap> voiceMap = Languages.get(voice.language); + if(voiceMap == null) + continue; + + List speakers = new ArrayList<>(); + if (voice.speakers != null) + speakers = Arrays.asList(voice.speakers); + voiceMap.put(voice.key, speakers); + VoiceMap.put(voice.key, voice); + } + + Spinner languageDD = findViewById(R.id.language); + languageDD.setOnItemSelectedListener(main); + Spinner voicesDD = findViewById(R.id.voices); + voicesDD.setOnItemSelectedListener(main); + Spinner speakerDD = findViewById(R.id.speakers); + speakerDD.setOnItemSelectedListener(main); + + String[] locales = Resources.getSystem().getAssets().getLocales(); + setLanguageItems(Languages.keySet().toArray(new String[0])); + }); + } + + @Override + public void onSharedPreferenceChanged(SharedPreferences sharedPreferences, String key) { + if (key.equals(PREF_SERVER_ADDRESS)) { + Mimic3TTSEngineWeb.s_ServerAddress = sharedPreferences.getString(PREF_SERVER_ADDRESS, ""); + if (Mimic3TTSEngineWeb.s_RunningService != null) + Mimic3TTSEngineWeb.s_RunningService.triggerLoadVoices(); + adjustMissingServerAddressError(); + } else if (key.equals(PREF_CACHE_ACTIVATE)) { + boolean cacheActive = sharedPreferences.getBoolean(PREF_CACHE_ACTIVATE, true); + if(!cacheActive && Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.setCacheSize(0); + } + } + } + + @Override + public void onLoaded() { + Mimic3TTSEngineWeb.s_RunningService.addVoicesLoadedListener(this); + } + + @Override + public void onProgressChanged(SeekBar seekBar, int i, boolean b) { } + + @Override + public void onStartTrackingTouch(SeekBar seekBar) { } + + @Override + public void onStopTrackingTouch(SeekBar seekBar) { + int value = seekBar.getProgress(); + boolean changed = false; + if (seekBar.getId() == R.id.speedBar) { + SpeechSpeed = value; + SharedPreferences.edit().putInt(PREF_SPEED, SpeechSpeed).apply(); + changed = true; + } else if (seekBar.getId() == R.id.audioVolatilityBar) { + AudioVolatility = value; + SharedPreferences.edit().putInt(PREF_AUDIO_VOLATILITY, AudioVolatility).apply(); + changed = true; + } else if (seekBar.getId() == R.id.phonemeVolatilityBar) { + PhonemeVolatility = value; + SharedPreferences.edit().putInt(PREF_PHONEME_VOLATILITY, PhonemeVolatility).apply(); + changed = true; + } + + if(changed && Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.clearCache(true); + if(SelectedVoice != null && !SelectedVoice.equals("") && VoiceMap.containsKey(SelectedVoice)) { + MimicVoice voice = VoiceMap.get(SelectedVoice); + if(voice != null && (voice.speakers == null || voice.speakers.length == 0 || (SelectedSpeaker != null && !SelectedSpeaker.equals("")))) { + synthesizeDefaultStrings(); + } + } + } + } + + @Override + public void onError(String error) { + Context main = this; + runOnUiThread(() -> new AlertDialog.Builder(main) + .setTitle(R.string.tts_server_error) + .setMessage(error) + .setPositiveButton(R.string.ok, null) + .show()); + } +} \ No newline at end of file diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/Mimic3TTSEngine.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/Mimic3TTSEngine.java new file mode 100644 index 0000000..378d68d --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/Mimic3TTSEngine.java @@ -0,0 +1,158 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import android.speech.tts.SynthesisCallback; +import android.speech.tts.SynthesisRequest; +import android.speech.tts.TextToSpeech; +import android.speech.tts.TextToSpeechService; +import android.speech.tts.Voice; +//import com.chaquo.python.PyObject; +//import com.chaquo.python.Python; +//import com.chaquo.python.android.AndroidPlatform; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; + +public class Mimic3TTSEngine extends TextToSpeechService { + + //private final PyObject TtsModule; + private List Voices; + private MimicVoice CurrentVoice; + private String CurrentSpeaker; + + public Mimic3TTSEngine() { + /* + if(!Python.isStarted()) + Python.start(new AndroidPlatform(this)); + + Python py = Python.getInstance(); + TtsModule = py.getModule("tts"); + List pyVoices = TtsModule.callAttr("init", "--voices") + .asList(); + List voices = new ArrayList<>(); + for (PyObject pyVoice: pyVoices) { + voices.add(pyVoice.toJava(MimicVoice.class)); + } + Voices = voices; + */ + } + + @Override + protected int onIsLanguageAvailable(String lang, String country, String variant) { + for(MimicVoice voice: Voices) { + if(voice.language.equalsIgnoreCase(lang)) { + for (String speaker : voice.speakers) { + if(speaker.equalsIgnoreCase(variant)) { + return TextToSpeech.LANG_COUNTRY_VAR_AVAILABLE; + } + } + return TextToSpeech.LANG_COUNTRY_AVAILABLE; + } + } + return TextToSpeech.LANG_NOT_SUPPORTED; + } + + @Override + public int onIsValidVoiceName (String voiceName) { + for(MimicVoice voice : Voices) { + String currVoiceName = voice.key; + if(voice.speakers != null && voice.speakers.length > 0) { + for (String speaker : voice.speakers) { + currVoiceName = voice.key + "#" + speaker; + if(currVoiceName.equalsIgnoreCase(voiceName)) + return TextToSpeech.SUCCESS; + } + } else { + if(currVoiceName.equalsIgnoreCase(voiceName)) + return TextToSpeech.SUCCESS; + } + } + return TextToSpeech.ERROR; + } + + @Override + public List onGetVoices () { + List androidVoices = new ArrayList<>(); + for(MimicVoice voice : Voices) { + Voice androidVoice = new Voice(voice.key, new Locale(voice.language), Voice.QUALITY_NORMAL, Voice.LATENCY_NORMAL, false, new HashSet<>()); + if(voice.speakers != null && voice.speakers.length > 0) { + for (String speaker : voice.speakers) { + androidVoice = new Voice(voice.key + "#" + speaker, new Locale(voice.language), Voice.QUALITY_NORMAL, Voice.LATENCY_NORMAL, false, new HashSet<>()); + androidVoices.add(androidVoice); + } + } else { + androidVoices.add(androidVoice); + } + } + return androidVoices; + } + + @Override + protected String[] onGetLanguage() { + if(CurrentVoice == null) + return new String[0]; + return new String[] {CurrentVoice.language, "", CurrentSpeaker}; + } + + @Override + protected int onLoadLanguage(String lang, String country, String variant) { + return onIsLanguageAvailable(lang, country, variant); + } + + @Override + protected void onStop() { } + + @Override + protected void onSynthesizeText(SynthesisRequest synthesisRequest, SynthesisCallback synthesisCallback) { + /* + synthesisCallback.start(22050, AudioFormat.ENCODING_PCM_16BIT, 2); + String ssml = String.format("%s", synthesisRequest.getSpeechRate(), synthesisRequest.getCharSequenceText().toString()); + String[] args = new String[] { "--voice", synthesisRequest.getVoiceName(), "--ssml", ssml }; + PyObject result = TtsModule.callAttr("init", (Object) args); + Set resultSet = result.asSet(); + byte[] bytes = new byte[resultSet.size()]; + int i = 0; + for(PyObject b : resultSet) { + bytes[i] = b.toByte(); + i++; + } + int bufferSize = synthesisCallback.getMaxBufferSize(); + for(i = 0; i bytes.length) + end = bytes.length; + byte[] bytesSlice = Arrays.copyOfRange(bytes, offset, end); + synthesisCallback.audioAvailable(bytesSlice, 0, bytesSlice.length); + } + synthesisCallback.done(); + */ + } + + @Override + public String onGetDefaultVoiceNameFor(String lang, String country, String variant) { + for(MimicVoice voice : Voices) { + if(!voice.language.equalsIgnoreCase(lang)) + continue; + String currVoiceName = voice.key + " | default"; + if(voice.speakers != null && voice.speakers.length > 0) { + for (String speaker : voice.speakers) { + currVoiceName = voice.key + " | " + speaker; + if(speaker.equalsIgnoreCase(variant)) + return currVoiceName; + } + } else { + return currVoiceName; + } + } + return null; + } + + public List GetMimicVoices() { + List copy = new ArrayList<>(Voices.size()); + for(int i = 0; i voices); + } + + public interface OnLoadedListener { + void onLoaded(); + } + + public interface OnErrorListener { + void onError(String error); + } + + public static class CacheFile { + ArrayList Cache; + ArrayList> SpecialCache; + ArrayList Voices; + } + + public static class KVP { + public K Key; + public V Value; + public KVP(K key, V value) { + Key = key; + Value = value; + } + } + + public static Mimic3TTSEngineWeb s_RunningService; + public static String s_ServerAddress; + private static final List s_OnLoadedListeners = new ArrayList<>(); + private List Voices = new ArrayList<>(); + + private final Logger _Logger; + private Thread T; + private boolean Running; + private boolean FetchVoices = false; + + private long MaxCacheSizeInB = 2L * 1024 * 1024 * 1024; + private float MaxCacheSizeInGB = 2; + private long CurrentCacheSize = 0; + private final Lock CacheLock = new ReentrantLock(); + + private final List OnVoicesLoadedListeners = new ArrayList<>(); + private final List OnErrorListeners = new ArrayList<>(); + + private boolean SynthesisRequest = false; + private String SynthesisText; + private int SynthesisSpeechRate; + private String SynthesisVoice; + private String SynthesisSpecialKey = null; + private SynthesisCallback Callback; + + private final Map LocaleMap; + private final Map CountryMap; + + private Map Cache = new HashMap<>(); + private final Map SpecialCache = new HashMap<>(); + + private final Timer cacheFlushInterval; + + public Mimic3TTSEngineWeb() { + _Logger = Logger.getLogger("de.bentigorlich.mimic3ttsenginewrapper.Mimic3TTSEngineWeb"); + LogManager.getLogManager().addLogger(_Logger); + _Logger.info("Instantiated Mimic3TTSEngineWeb"); + + String[] languages = Locale.getISOLanguages(); + LocaleMap = new HashMap<>(languages.length); + CountryMap = new HashMap<>(languages.length); + for (String language : languages) { + Locale locale = new Locale(language); + LocaleMap.put(locale.getISO3Language(), locale); + if(!locale.getISO3Country().equals("")) + CountryMap.put(locale.getISO3Country(), locale); + } + + cacheFlushInterval = new Timer(); + cacheFlushInterval.schedule(new TimerTask() { + @Override + public void run() { + saveCache(); + } + }, 30000, 600000); + } + + @Override + public void onCreate() { + super.onCreate(); + _Logger.info("Created Mimic3TTSEngineWeb"); + } + + @Override + public int onStartCommand(Intent intent, int flags, int startId) { + _Logger.info("Started Mimic3TTSEngineWeb"); + if(intent != null && intent.getAction() != null && !intent.getAction().equals("")) + _Logger.info("got action: " + intent.getAction()); + s_RunningService = this; + Running = true; + FetchVoices = true; + if(intent != null) { + String address = intent.getStringExtra("server_address"); + if (address != null && !address.equals("")) + s_ServerAddress = address; + } + + try { + buildCache(); + if(Voices != null && Voices.size() > 0) { + FetchVoices = false; + } + } catch (Exception ex) { + _Logger.severe("An unhandled exception occurred: " + ex.getClass().getName() + ": " + ex.getMessage()); + for(StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at: " + el.toString()); + } + } + T = new Thread(this::main); + T.start(); + return super.onStartCommand(intent, flags, startId); + } + + @Override + public boolean stopService(Intent name) { + _Logger.info("stopping TTS service"); + s_RunningService = null; + Running = false; + cacheFlushInterval.cancel(); + saveCache(); + return super.stopService(name); + } + + @Override + public void onDestroy() { + _Logger.info("destroying TTS service"); + cacheFlushInterval.cancel(); + saveCache(); + super.onDestroy(); + } + + private void main() { + for (OnLoadedListener listener : s_OnLoadedListeners) { + listener.onLoaded(); + } + + if(Voices != null && Voices.size() > 0) { + for(OnVoicesLoadedListener listener : OnVoicesLoadedListeners) { + listener.onVoicesLoaded(Voices); + } + } + + while(Running) { + try { + if (FetchVoices) { + loadVoices(); + FetchVoices = false; + } else if (SynthesisRequest) { + synthesizeText(SynthesisText, SynthesisVoice, SynthesisSpeechRate, Callback, SynthesisSpecialKey); + SynthesisRequest = false; + } else { + try { + Thread.sleep(100); + } catch (InterruptedException ignored) { + } + } + } catch (Exception ex) { + _Logger.severe("An unhandled exception occurred: " + ex.getClass().getName() + ": " + ex.getMessage()); + for(StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at: " + el.toString()); + } + } + } + } + + private void loadVoices() { + List voices = new ArrayList<>(); + if(s_ServerAddress != null) { + String slash = ""; + if (!s_ServerAddress.endsWith("/")) + slash = "/"; + _Logger.info("Fetching Voices from " + s_ServerAddress + slash + "api/voices"); + try { + URL url = new URL(s_ServerAddress + slash + "api/voices"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + try { + BufferedInputStream in = new BufferedInputStream(conn.getInputStream()); + _Logger.info("Got raw data"); + Type listType = new TypeToken>() {}.getType(); + Gson gson = new Gson(); + String rawData = new BufferedReader(new InputStreamReader(in)).lines().collect(Collectors.joining()); + voices = gson.fromJson(rawData, listType); + _Logger.info("Got Voices"); + } finally { + conn.disconnect(); + } + } catch (MalformedURLException ex) { + _Logger.severe("Malformed server url: " + ex.getMessage()); + ex.printStackTrace(); + for (OnErrorListener listener: OnErrorListeners) { + listener.onError(ex.getMessage()); + } + } catch (IOException ex) { + _Logger.severe("Connection error: " + ex.getMessage()); + ex.printStackTrace(); + for (StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at " + el.toString()); + } + for (OnErrorListener listener: OnErrorListeners) { + listener.onError(ex.getMessage()); + } + } catch (JsonSyntaxException | IllegalStateException ex) { + _Logger.severe("Json error: " + ex.getMessage()); + ex.printStackTrace(); + for (StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at " + el.toString()); + } + for (OnErrorListener listener: OnErrorListeners) { + listener.onError(ex.getMessage()); + } + } catch (Exception ex) { + _Logger.severe("Error: " + ex.getMessage()); + ex.printStackTrace(); + for (StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at " + el.toString()); + } + for (OnErrorListener listener: OnErrorListeners) { + listener.onError(ex.getMessage()); + } + } + } + Voices = voices; + voices = getMimicVoices(); + for (OnVoicesLoadedListener listener: OnVoicesLoadedListeners) { + listener.onVoicesLoaded(voices); + } + } + + @Override + protected int onIsLanguageAvailable(String lang, String country, String variant) { + if(LocaleMap.containsKey(lang)) + lang = Objects.requireNonNull(LocaleMap.get(lang)).getLanguage(); + if(CountryMap.containsKey(country)) + country = Objects.requireNonNull(CountryMap.get(country)).getCountry(); + + for(MimicVoice voice: Voices) { + String[] voiceParts = voice.language.split("[-_/]"); + if (voiceParts.length >= 1 && voiceParts[0].equalsIgnoreCase(lang)) { + if(voiceParts.length >= 2 && voiceParts[1].equalsIgnoreCase(country)) { + return TextToSpeech.LANG_COUNTRY_AVAILABLE; + } + return TextToSpeech.LANG_AVAILABLE; + } + } + _Logger.warning("we don't support: " + lang + "-" + country + "-" + variant); + return TextToSpeech.LANG_NOT_SUPPORTED; + } + + @Override + public int onIsValidVoiceName (String voiceName) { + for(MimicVoice voice : Voices) { + String currVoiceName = voice.key; + if(voice.speakers != null && voice.speakers.length > 0) { + for (String speaker : voice.speakers) { + currVoiceName = voice.key + "#" + speaker; + if(currVoiceName.equalsIgnoreCase(voiceName)) + return TextToSpeech.SUCCESS; + } + } else { + if(currVoiceName.equalsIgnoreCase(voiceName)) + return TextToSpeech.SUCCESS; + } + } + return TextToSpeech.ERROR; + } + + @Override + public List onGetVoices () { + List androidVoices = new ArrayList<>(); + for(MimicVoice voice : Voices) { + Voice androidVoice = new Voice(voice.key, new Locale(voice.language), Voice.QUALITY_NORMAL, Voice.LATENCY_NORMAL, false, new HashSet<>()); + if(voice.speakers != null && voice.speakers.length > 0) { + for (String speaker : voice.speakers) { + androidVoice = new Voice(voice.key + "#" + speaker, new Locale(voice.language), Voice.QUALITY_NORMAL, Voice.LATENCY_NORMAL, false, new HashSet<>()); + androidVoices.add(androidVoice); + } + } else { + androidVoices.add(androidVoice); + } + } + return androidVoices; + } + + @Override + protected String[] onGetLanguage() { + SharedPreferences preferences = PreferenceManager.getDefaultSharedPreferences(Mimic3TTSEngineWrapperApp.getStorageContext()); + String language = preferences.getString(Mimic3TTSEngineWrapperApp.PREF_LANGUAGE, "en_US"); + _Logger.info("Someone requested the current language: " + language); + return language.split("[-_]"); + } + + @Override + protected int onLoadLanguage(String lang, String country, String variant) { + _Logger.info("We shall load: " + lang + "-" + country + "-" + variant); + return onIsLanguageAvailable(lang, country, variant); + } + + @Override + protected void onStop() { } + + @Override + protected void onSynthesizeText(SynthesisRequest synthesisRequest, SynthesisCallback synthesisCallback) { + synthesizeText(synthesisRequest.getCharSequenceText().toString(), synthesisRequest.getVoiceName(), synthesisRequest.getSpeechRate(), synthesisCallback, null); + } + + protected void synthesizeText(String text, @Nullable String voice, int speechRate, SynthesisCallback synthesisCallback, String specialKey) { + SharedPreferences preferences = PreferenceManager.getDefaultSharedPreferences(Mimic3TTSEngineWrapperApp.getStorageContext()); + String prefVoiceKey = preferences.getString(Mimic3TTSEngineWrapperApp.PREF_VOICE, ""); + String speaker = preferences.getString(Mimic3TTSEngineWrapperApp.PREF_SPEAKER, null); + MimicVoice prefVoice = null; + MimicVoice givenVoice = null; + boolean canUseCache = false; + for(MimicVoice currVoice : Voices) { + if(currVoice.key.equals(prefVoiceKey)) { + prefVoice = currVoice; + canUseCache = true; + } else if (voice != null && currVoice.key.equals(voice)) { + givenVoice = currVoice; + } + } + if(givenVoice == null || (prefVoice != null && givenVoice.language.equals(prefVoice.language))) { + voice = prefVoiceKey; + if(speaker != null) + voice += "#" + speaker; + } + + CacheEntry entry = getCacheEntry(text); + boolean specialKeySet = specialKey != null && !specialKey.equals(""); + boolean useCache = canUseCache && ((specialKeySet && SpecialCache.containsKey(specialKey)) || entry != null); + + synthesisCallback.start(22050, AudioFormat.ENCODING_PCM_16BIT, 1); + + if(s_ServerAddress != null) { + if(!useCache) { + synthesizeTextFromUrl(preferences, speechRate, voice, text, synthesisCallback, specialKey); + } else { + try { + synthesizeTextFromCache(specialKeySet, specialKey, text, synthesisCallback); + } catch (FileNotFoundException e) { + synthesizeTextFromUrl(preferences, speechRate, voice, text, synthesisCallback, specialKey); + } + } + } + } + + private void synthesizeTextFromUrl(SharedPreferences preferences, int speechRate, String voice, String text, SynthesisCallback synthesisCallback, String specialKey) { + try { + String slash = ""; + if (!s_ServerAddress.endsWith("/")) + slash = "/"; + float lengthScale = (float)1 / ((float)speechRate/100); + float noiseScale = (float)preferences.getInt(Mimic3TTSEngineWrapperApp.PREF_AUDIO_VOLATILITY, 667) / 1000; + float noiseW = (float)preferences.getInt(Mimic3TTSEngineWrapperApp.PREF_PHONEME_VOLATILITY, 800) / 1000; + String urlString = s_ServerAddress + slash + "api/tts?ssml=0&audioTarget=client&noiseScale=" + noiseScale + "&noiseW=" + noiseW + "&lengthScale=" + lengthScale + "&voice=" + URLEncoder.encode(voice, StandardCharsets.UTF_8.toString()); + _Logger.info("Synthesizing text with " + urlString + " : " + text); + URL url = new URL(urlString); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + try { + byte[] outputBuffer = text.getBytes(StandardCharsets.UTF_8); + conn.setRequestMethod("POST"); + conn.setDoOutput(true); + conn.setFixedLengthStreamingMode(outputBuffer.length); + BufferedOutputStream out = new BufferedOutputStream(conn.getOutputStream()); + out.write(outputBuffer, 0, outputBuffer.length); + out.close(); + + int status = conn.getResponseCode(); + String message = conn.getResponseMessage(); + InputStream in = new BufferedInputStream(conn.getInputStream()); + int nRead; + byte[] data = new byte[16384]; + ByteArrayOutputStream byteBuffer = new ByteArrayOutputStream(); + + _Logger.info("Got raw data"); + while ((nRead = in.read(data, 0, data.length)) != -1) { + synthesisCallback.audioAvailable(data, 0, nRead); + byteBuffer.write(data, 0, nRead); + } + in.close(); + _Logger.info("Got audio"); + synthesisCallback.done(); + byte[] completeData = byteBuffer.toByteArray(); + CacheEntry cacheEntry = new CacheEntry(); + cacheEntry.Text = text; + cacheEntry.ByteSize = completeData.length; + addToCache(completeData, cacheEntry, specialKey); + } finally { + conn.disconnect(); + } + } catch (MalformedURLException ex) { + _Logger.severe("Malformed server url: " + ex.getMessage()); + ex.printStackTrace(); + synthesisCallback.error(); + } catch (IOException ex) { + _Logger.severe("Connection error: " + ex.getMessage()); + ex.printStackTrace(); + for (StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at " + el.toString()); + } + + if(SpecialCache.containsKey("default_no_connection")) { + CacheEntry noConn = SpecialCache.get("default_no_connection"); + File noConnFile = new File(Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(), "default_no_connection"); + if(noConnFile.exists()) { + try { + InputStream in = Files.newInputStream(noConnFile.toPath()); + ByteArrayOutputStream byteBuffer = new ByteArrayOutputStream(); + int nRead; + byte[] data = new byte[16384]; + _Logger.info("Got raw data"); + while ((nRead = in.read(data, 0, data.length)) != -1) { + synthesisCallback.audioAvailable(data, 0, nRead); + byteBuffer.write(data, 0, nRead); + } + _Logger.info("Got audio"); + synthesisCallback.done(); + in.close(); + } catch (IOException ex2) { + synthesisCallback.error(); + _Logger.severe("Cache error loading default_no_connection: " + ex2.getMessage()); + ex.printStackTrace(); + for (StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at: " + el.toString()); + } + } + } else { + synthesisCallback.error(); + _Logger.severe("default_no_connection was in cache, but file doesn't exist"); + } + } else { + synthesisCallback.error(); + _Logger.severe("default_no_connection was not in cache"); + } + } + } + + private void synthesizeTextFromCache(boolean specialKeySet, String specialKey, String text, SynthesisCallback synthesisCallback) throws FileNotFoundException { + String key; + if (!specialKeySet) { + try { + key = Mimic3TTSEngineWrapperApp.getSha256Hex(text); + } catch (NoSuchAlgorithmException e) { + synthesisCallback.error(); + return; + } + } else { + key = specialKey; + } + File cacheFile = new File(Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(), key); + if(!cacheFile.exists()) + throw new FileNotFoundException(key); + + _Logger.info("Synthesizing text with cache: " + text); + try { + InputStream in = new BufferedInputStream(Files.newInputStream(cacheFile.toPath())); + int nRead; + byte[] data = new byte[16384]; + + _Logger.info("Got raw data"); + while ((nRead = in.read(data, 0, data.length)) != -1) { + synthesisCallback.audioAvailable(data, 0, nRead); + } + synthesisCallback.done(); + } catch (IOException ex) { + _Logger.severe("IO error: " + ex.getMessage()); + ex.printStackTrace(); + for (StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at " + el.toString()); + } + synthesisCallback.error(); + } + } + + private CacheEntry getCacheEntry(String text) { + try { + String key = Mimic3TTSEngineWrapperApp.getSha256Hex(text); + if(Cache.containsKey(key)) + return Cache.get(key); + } catch (NoSuchAlgorithmException e) { + return null; + } + return null; + } + + public void dispatchSynthesisRequest(@NonNull String text, @NonNull String voice, int speechRate, @NonNull SynthesisCallback synthesisCallback, @Nullable String specialKey) { + SynthesisText = text; + SynthesisSpeechRate = speechRate; + SynthesisVoice = voice; + Callback = synthesisCallback; + SynthesisRequest = true; + SynthesisSpecialKey = specialKey; + } + + @Override + public String onGetDefaultVoiceNameFor(String lang, String country, String variant) { + if(LocaleMap.containsKey(lang)) + lang = Objects.requireNonNull(LocaleMap.get(lang)).getLanguage(); + if(CountryMap.containsKey(country)) + country = Objects.requireNonNull(CountryMap.get(country)).getCountry(); + + MimicVoice fallback = null; + for(MimicVoice voice : Voices) { + String[] voiceParts = voice.language.split("[-_/]"); + if (voiceParts.length >= 1 && voiceParts[0].equalsIgnoreCase(lang)) { + if(voiceParts.length >= 2 && voiceParts[1].equalsIgnoreCase(country)) { + return voice.key; + } + if(fallback == null) + fallback = voice; + } + } + if(fallback != null) + return fallback.key; + _Logger.warning("couldn't find a voice name for " + lang + "-" + country + "-" + variant); + return null; + } + + public List getMimicVoices() { + List copy = new ArrayList<>(); + for(int i = 0; i(); + ArrayList> specialCache = new ArrayList<>(); + for(Map.Entry entry : SpecialCache.entrySet()) { + specialCache.add(new KVP<>(entry.getKey(), entry.getValue())); + } + cf.SpecialCache.addAll(specialCache); + cf.Cache = new ArrayList<>(); + cf.Cache.addAll(Cache.values()); + cf.Voices = (ArrayList) Voices; + String json = gson.toJson(cf); + File f = new File(Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(), "cache.json"); + try { + if (!f.exists()) { + if(!f.createNewFile()) + _Logger.severe("couldn't create cache.json... cache will be lost"); + } + + if(f.exists()) { + FileWriter writer = new FileWriter(f, false); + writer.write(json); + writer.close(); + } + } catch (IOException ignored) {} + } + + private void buildCache() { + _Logger.info("building cache"); + File cacheDir = Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(); + File cacheJson = null; + Map files = new HashMap<>(); + if(cacheDir != null && !cacheDir.isFile()) { + for(File f : cacheDir.listFiles()) { + if(f.getName().equals("cache.json")) + cacheJson = f; + else + files.put(f.getName(), f); + } + } + if(cacheJson != null) { + Gson gson = new Gson(); + try { + BufferedReader reader = new BufferedReader(new FileReader(cacheJson)); + String json = reader.lines().collect(Collectors.joining()); + CacheFile cacheFile = gson.fromJson(json, CacheFile.class); + try { + CacheLock.lock(); + Cache.clear(); + CurrentCacheSize = 0; + for (CacheEntry entry : cacheFile.Cache) { + String entryId = Mimic3TTSEngineWrapperApp.getSha256Hex(entry.Text); + if (files.containsKey(entryId)) { + File f = files.get(entryId); + if (f != null) { + Cache.put(entryId, entry); + CurrentCacheSize += f.length(); + } + } + } + for (KVP entry : cacheFile.SpecialCache) { + if(files.containsKey(entry.Key)) { + File f = files.get(entry.Key); + if(f != null) { + SpecialCache.put(entry.Key, entry.Value); + } + } + } + Voices = cacheFile.Voices; + _Logger.info("built cache, " + Cache.size() + " entries in cache, " + SpecialCache.size() + " entries in special cache and " + Voices.size() + " voices"); + } catch (NoSuchAlgorithmException ignored) { + } finally { + CacheLock.unlock(); + } + } catch (FileNotFoundException ignored) { } + } + } + + private void addToCache(byte[] data, CacheEntry cacheEntry, @Nullable String specialKey) throws IllegalArgumentException { + if (cacheEntry.ByteSize != data.length){ + throw new IllegalArgumentException("cacheEntry.ByteSize != actual byte size"); + } + + File cacheDir = Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(); + try { + String key; + if (specialKey != null && !specialKey.equals("")) + key = specialKey; + else + key = Mimic3TTSEngineWrapperApp.getSha256Hex(cacheEntry.Text); + File f = new File(cacheDir, key); + if(f.exists()) { + boolean ignored = f.delete(); + } + if(f.createNewFile()) { + try (FileOutputStream out = new FileOutputStream(f)) { + out.write(data); + out.close(); + CacheLock.lock(); + CurrentCacheSize += cacheEntry.ByteSize; + if (specialKey != null && !specialKey.equals("")) { + _Logger.info("adding " + specialKey + " to special cache"); + SpecialCache.put(specialKey, cacheEntry); + } else { + _Logger.info("adding '" + cacheEntry.Text + "' to cache"); + Cache.put(key, cacheEntry); + } + } + finally { + CacheLock.unlock(); + } + } else { + _Logger.severe("because of some reason we cannot create the file: " + f.getAbsolutePath()); + } + } catch (IOException | NoSuchAlgorithmException ex) { + _Logger.severe(ex.getClass().getName() + " occurred: " + ex.getMessage()); + for (StackTraceElement el : ex.getStackTrace()) { + _Logger.warning("at: " + el.toString()); + } + } + + } + + public void setCacheSize(float newCacheSizeInGB) { + MaxCacheSizeInGB = newCacheSizeInGB; + MaxCacheSizeInB = (long)newCacheSizeInGB * 1024 * 1024 * 1024; + if(MaxCacheSizeInB > CurrentCacheSize) + shrinkCacheToSize(MaxCacheSizeInB); + } + + private void shrinkCacheToSize(long shrinkToSizeInB) { + _Logger.info("shrinking cache to " + shrinkToSizeInB + " bytes, currently: " + CurrentCacheSize); + try { + CacheLock.lock(); + List entries = new ArrayList<>(); + for (CacheEntry entry : Cache.values()) { + entries.add(entry.clone()); + } + entries.sort(Comparator.comparing(cacheEntry -> cacheEntry.LastUsed)); + long newCacheSize = 0; + Map newCache = new HashMap<>(); + File cacheDir = Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(); + for (CacheEntry entry : entries) { + String entryId = Mimic3TTSEngineWrapperApp.getSha256Hex(entry.Text); + if (newCacheSize < shrinkToSizeInB) { + newCache.put(entryId, entry); + newCacheSize += entry.ByteSize; + } else { + File f = new File(cacheDir, entryId); + if (f.exists()) { + boolean ignored = f.delete(); + } + } + } + Cache = newCache; + } + catch (NoSuchAlgorithmException ignored) { } + finally { + CacheLock.unlock(); + } + } + + private void clearSpecialCache() { + _Logger.info("clearing special cache"); + try { + CacheLock.lock(); + File cacheDir = Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(); + for (Map.Entry set: SpecialCache.entrySet()) { + File f = new File(cacheDir, set.getKey()); + if (f.exists()) { + boolean ignored = f.delete(); + } + } + SpecialCache.clear(); + } + finally { + CacheLock.unlock(); + } + } + + private void clearCache() { + _Logger.info("clearing cache"); + try { + CacheLock.lock(); + File cacheDir = Mimic3TTSEngineWrapperApp.getStorageContext().getCacheDir(); + for (Map.Entry set: Cache.entrySet()) { + File f = new File(cacheDir, set.getKey()); + if (f.exists()) { + boolean ignored = f.delete(); + } + } + Cache.clear(); + } + finally { + CacheLock.unlock(); + } + } + + public void triggerLoadVoices() { + FetchVoices = true; + } + + public void clearCache(boolean clearSpecialCacheToo) { + if(clearSpecialCacheToo) + clearSpecialCache(); + clearCache(); + } + + public void addVoicesLoadedListener(OnVoicesLoadedListener listener) { + OnVoicesLoadedListeners.add(listener); + } + + public void removeVoicesLoadedListener(OnVoicesLoadedListener listener) { + OnVoicesLoadedListeners.remove(listener); + } + + public static void addLoadedListener(OnLoadedListener listener) { + s_OnLoadedListeners.add(listener); + } + + public static void removeLoadedListener(OnLoadedListener listener) { + s_OnLoadedListeners.remove(listener); + } + + public void addErrorListener(OnErrorListener listener) { + OnErrorListeners.add(listener); + } + + public void removeErrorListener(OnErrorListener listener) { + OnErrorListeners.remove(listener); + } +} diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/Mimic3TTSEngineWrapperApp.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/Mimic3TTSEngineWrapperApp.java new file mode 100644 index 0000000..6a221a1 --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/Mimic3TTSEngineWrapperApp.java @@ -0,0 +1,49 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import android.app.Application; +import android.content.Context; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +public class Mimic3TTSEngineWrapperApp extends Application { + private static Context storageContext; + public static final String PREF_LANGUAGE = "selected_language"; + public static final String PREF_VOICE = "selected_voice"; + public static final String PREF_SPEAKER = "selected_speaker"; + public static final String PREF_SPEED = "speech_speed"; + public static final String PREF_SERVER_ADDRESS = "server_address"; + public static final String PREF_AUDIO_VOLATILITY = "audio_volatility"; + public static final String PREF_PHONEME_VOLATILITY = "phoneme_volatility"; + public static final String PREF_CACHE_ACTIVATE = "cache_activate"; + public static final String PREF_CACHE_SIZE = "cache_size"; + public static final String PREF_CACHE_CLEAR = "cache_clear"; + + public static String getSha256Hex(String text) throws NoSuchAlgorithmException { + final MessageDigest digest = MessageDigest.getInstance("SHA-256"); + final byte[] hashBytes = digest.digest(text.getBytes(StandardCharsets.UTF_8)); + return bytesToHex(hashBytes); + } + + public static String bytesToHex(byte[] hash) { + StringBuilder hexString = new StringBuilder(2 * hash.length); + for (int i = 0; i < hash.length; i++) { + String hex = Integer.toHexString(0xff & hash[i]); + if(hex.length() == 1) { + hexString.append('0'); + } + hexString.append(hex); + } + return hexString.toString(); + } + + public void onCreate() { + super.onCreate(); + Mimic3TTSEngineWrapperApp.storageContext = getApplicationContext(); + } + + public static Context getStorageContext() { + return Mimic3TTSEngineWrapperApp.storageContext; + } +} diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/MimicVoice.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/MimicVoice.java new file mode 100644 index 0000000..b0f3cd4 --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/MimicVoice.java @@ -0,0 +1,32 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import androidx.annotation.NonNull; + +public class MimicVoice implements Cloneable { + public String key; + public String name; + public String language; + public String description; + public String location; + public String[] speakers; + public String[] aliases; + public String[] version; + public String sample_text; + + @NonNull + @Override + public MimicVoice clone() { + MimicVoice copy = new MimicVoice(); + copy.key = key; + copy.name = name; + copy.language = language; + copy.description = description; + copy.location = location; + copy.speakers = speakers; + copy.aliases = aliases; + copy.version = version; + copy.sample_text = sample_text; + + return copy; + } +} diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SettingsActivity.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SettingsActivity.java new file mode 100644 index 0000000..27dc0c9 --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SettingsActivity.java @@ -0,0 +1,59 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import androidx.annotation.NonNull; +import androidx.appcompat.app.AppCompatActivity; +import androidx.preference.Preference; +import androidx.preference.PreferenceManager; + +import android.app.AlertDialog; +import android.content.Context; +import android.content.SharedPreferences; +import android.os.Bundle; + +public class SettingsActivity extends AppCompatActivity implements Mimic3TTSEngineWeb.OnErrorListener, Preference.OnPreferenceChangeListener { + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_settings); + getSupportFragmentManager().beginTransaction() + .setReorderingAllowed(true) + .add(R.id.settingsView, SettingsFragment.class, null) + .commit(); + if(Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.addErrorListener(this); + } + } + + @Override + protected void onDestroy() { + super.onDestroy(); + if(Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.removeErrorListener(this); + } + } + + @Override + public void onError(String error) { + Context settings = this; + runOnUiThread(() -> new AlertDialog.Builder(settings) + .setTitle(R.string.tts_server_error) + .setMessage(error) + .setPositiveButton(R.string.ok, null) + .show()); + + } + + @Override + public boolean onPreferenceChange(@NonNull Preference preference, Object newValue) { + if(preference.getKey().equals(Mimic3TTSEngineWrapperApp.PREF_CACHE_SIZE)) { + SharedPreferences preferences = PreferenceManager.getDefaultSharedPreferences(Mimic3TTSEngineWrapperApp.getStorageContext()); + String sizeString = preferences.getString(Mimic3TTSEngineWrapperApp.PREF_CACHE_SIZE, "2"); + float size = Float.parseFloat(sizeString); + if(Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.setCacheSize(size); + } + } + return true; + } +} \ No newline at end of file diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SettingsFragment.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SettingsFragment.java new file mode 100644 index 0000000..1ce1bfa --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SettingsFragment.java @@ -0,0 +1,38 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import android.app.AlertDialog; +import android.os.Bundle; + +import androidx.annotation.NonNull; +import androidx.preference.Preference; +import androidx.preference.PreferenceFragmentCompat; + +public class SettingsFragment extends PreferenceFragmentCompat implements Preference.OnPreferenceClickListener { + + @Override + public void onCreatePreferences(Bundle savedInstanceState, String rootKey) { + setPreferencesFromResource(R.xml.root_preferences, rootKey); + Preference cacheClear = findPreference("cache_clear"); + if(cacheClear != null) + cacheClear.setOnPreferenceClickListener(this); + } + + @Override + public boolean onPreferenceClick(@NonNull Preference preference) { + if(preference.getKey().equals("cache_clear")) { + if(Mimic3TTSEngineWeb.s_RunningService != null) { + Mimic3TTSEngineWeb.s_RunningService.clearCache(false); + new AlertDialog.Builder(getContext()) + .setMessage(R.string.cache_reset_successful) + .setPositiveButton(R.string.ok, null) + .show(); + } else { + new AlertDialog.Builder(getContext()) + .setMessage(R.string.cache_reset_failed) + .setPositiveButton(R.string.ok, null) + .show(); + } + } + return false; + } +} \ No newline at end of file diff --git a/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SynthesisListener.java b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SynthesisListener.java new file mode 100644 index 0000000..b9f17e3 --- /dev/null +++ b/app/src/main/java/de/bentigorlich/mimic3ttsenginewrapper/SynthesisListener.java @@ -0,0 +1,111 @@ +package de.bentigorlich.mimic3ttsenginewrapper; + +import android.media.AudioAttributes; +import android.media.AudioFormat; +import android.media.AudioManager; +import android.media.AudioTrack; +import android.speech.tts.SynthesisCallback; +import android.speech.tts.TextToSpeech; + +import java.io.ByteArrayOutputStream; +import java.util.logging.LogManager; +import java.util.logging.Logger; + +public class SynthesisListener implements SynthesisCallback { + + boolean Started = false; + boolean Finished = false; + final boolean PlayOnFinish; + int SampleRate; + AudioFormat Format; + int ChannelCount; + ByteArrayOutputStream AudioBuffer = new ByteArrayOutputStream(); + Logger _Logger; + + public SynthesisListener(boolean playOnFinish) { + _Logger = Logger.getLogger(this.getClass().toString()); + LogManager.getLogManager().addLogger(_Logger); + PlayOnFinish = playOnFinish; + } + + @Override + public int getMaxBufferSize() { + return Integer.MAX_VALUE; + } + + @Override + public int start(int sampleRate, int audioFormat, int channelCount) { + _Logger.info("starting synthesis with sampleRate: " + sampleRate + " with channel: " + channelCount + " in format: " + audioFormat); + SampleRate = sampleRate; + Format = new AudioFormat.Builder() + .setSampleRate(sampleRate) + .setEncoding(audioFormat) + .setChannelMask(AudioFormat.CHANNEL_OUT_MONO) + .build(); + ChannelCount = channelCount; + Started = true; + return TextToSpeech.SUCCESS; + } + + @Override + public int audioAvailable(byte[] buffer, int offset, int length) { + _Logger.info("got some bytes (" + length + ")"); + AudioBuffer.write(buffer, offset, length); + return TextToSpeech.SUCCESS; + } + + @Override + public int done() { + if(!PlayOnFinish) { + _Logger.info("Synthesis done"); + return 0; + } + + _Logger.info("Synthesis done, building track"); + Finished = true; + AudioAttributes attributes = new AudioAttributes.Builder() + .build(); + AudioTrack t = new AudioTrack(attributes, Format, AudioBuffer.size(), AudioTrack.MODE_STATIC, AudioManager.AUDIO_SESSION_ID_GENERATE); + int trackError = t.write(AudioBuffer.toByteArray(), 0, AudioBuffer.size()); + if(trackError >= 0) { + _Logger.info("playing track"); + t.play(); + } else { + switch (trackError) { + case AudioTrack.ERROR_BAD_VALUE: + _Logger.severe("AudioTrack.ERROR_BAD_VALUE"); + break; + case AudioTrack.ERROR_DEAD_OBJECT: + _Logger.severe("AudioTrack.ERROR_DEAD_OBJECT"); + break; + case AudioTrack.ERROR_INVALID_OPERATION: + _Logger.severe("AudioTrack.ERROR_INVALID_OPERATION"); + break; + case AudioTrack.ERROR: + _Logger.severe("AudioTrack.ERROR"); + break; + } + } + return 0; + } + + @Override + public void error() { + Finished = true; + } + + @Override + public void error(int i) { + Finished = true; + } + + @Override + public boolean hasStarted() { + return Started; + } + + @Override + public boolean hasFinished() { + return Finished; + } +} diff --git a/app/src/main/python/tts.py b/app/src/main/python/tts.py new file mode 100644 index 0000000..d960700 --- /dev/null +++ b/app/src/main/python/tts.py @@ -0,0 +1,180 @@ +import argparse +import io +import logging +import threading +import typing +import wave +from queue import Queue +from mimic3_tts.__main__ import CommandLineInterfaceState, process_line, shutdown_tts, OutputNaming +from opentts_abc import Voice + +_LOGGER = logging.getLogger() + + +def init(args: list[str]): + parser = argparse.ArgumentParser(prog="mimic3wrapper", description="Wrapper for Mimic 3 command-line interface") + parser.add_argument("text", nargs="*", help="Text to convert to speech") + parser.add_argument("--voice", "-v", help="Name of voice (expected in /)") + parser.add_argument("--speaker", "-s", help="Name or number of speaker (default: first speaker)") + parser.add_argument("--voices-dir", action="append", help="Directory with voices (format is /)") + parser.add_argument("--ssml", action="store_true", help="Input text is SSML") + parser.add_argument("--deterministic", action="store_true", help="Ensure that the same audio is always synthesized from the same text") + parser.add_argument("--noise-scale", type=float, help="Noise scale [0-1], default is 0.667") + parser.add_argument("--length-scale", type=float, help="Length scale (1.0 is default speed, 0.5 is 2x faster)") + parser.add_argument("--noise-w", type=float, help="Variation in cadence [0-1], default is 0.8") + parser.add_argument("--result-queue-size", default=5, help="Maximum number of sentences to maintain in output queue (default: 5)") + parser.add_argument("--voices", action="store_true", help="List available voices") + parsed_args = CommandLineInterfaceState(args=parser.parse_args(args)) + return main(parsed_args) + + +def main(state: CommandLineInterfaceState): + initialize_args(state) + initialize_tts(state) + logging.basicConfig(level=logging.DEBUG) + _LOGGER.setLevel(logging.DEBUG) + + try: + if state.args.voices or not state.texts: + result_voices: list[Voice] = [] + for voice in state.tts.get_voices(): + result_voices.append(voice) + return result_voices + else: + return process_lines(state) + finally: + shutdown_tts(state) + + +def process_lines(state: CommandLineInterfaceState): + assert state.texts is not None + try: + result_idx = 0 + + for line in state.texts: + line_voice: typing.Optional[str] = None + line_id = "" + line = line.strip() + if not line: + continue + + process_line(line, state, line_id=line_id, line_voice=line_voice) + result_idx += 1 + + except KeyboardInterrupt: + if state.result_queue is not None: + # Draw audio playback queue + while not state.result_queue.empty(): + state.result_queue.get() + finally: + # Wait for raw stream to finish + if state.result_queue is not None: + state.result_queue.put(None) + + if state.result_thread is not None: + state.result_thread.join() + + # ------------------------------------------------------------------------- + + # Write combined audio to stdout + if state.all_audio: + _LOGGER.debug("writing byte array with " + len(state.all_audio).__str__() + " bytes") + with io.BytesIO() as wav_io: + wav_file_play: wave.Wave_write = wave.open(wav_io, "wb") + with wav_file_play: + wav_file_play.setframerate(state.sample_rate_hz) + wav_file_play.setsampwidth(state.sample_width_bytes) + wav_file_play.setnchannels(state.num_channels) + wav_file_play.writeframes(state.all_audio) + + return wav_io.getvalue() + + +def initialize_tts(state: CommandLineInterfaceState): + """Create Mimic 3 TTS from command-line arguments""" + from mimic3_tts import Mimic3Settings, Mimic3TextToSpeechSystem # noqa: F811 + + args = state.args + + # Local TTS + state.tts = Mimic3TextToSpeechSystem( + Mimic3Settings( + length_scale=args.length_scale, + noise_scale=args.noise_scale, + noise_w=args.noise_w, + voices_directories=args.voices_dir, + use_cuda=False, + use_deterministic_compute=args.deterministic, + ) + ) + + if state.args.voices: + return + + state.tts.voice = args.voice + state.tts.speaker = args.speaker + + if state.tts: + if state.args.voice: + # Set default voice + state.tts.voice = state.args.voice + + state.result_queue = Queue(maxsize=args.result_queue_size) + + state.result_thread = threading.Thread( + target=process_result, daemon=True, args=(state,) + ) + state.result_thread.start() + + +def initialize_args(state: CommandLineInterfaceState): + """Initialize CLI state from command-line arguments""" + args = state.args + + if args.ssml: + # Avoid text mangling when using SSML + args.output_naming = OutputNaming.TIME + + # Read text from stdin or arguments + if args.text: + # Use arguments + state.texts = args.text + + if (not args.speaker) and args.voice and ("#" in args.voice): + # Split apart voice + args.voice, args.speaker = args.voice.split("#", maxsplit=1) + + if args.deterministic: + # Disable noise + args.noise_scale = 0.0 + args.noise_w = 0.0 + + +def process_result(state: CommandLineInterfaceState): + try: + from mimic3_tts import AudioResult, MarkResult + + assert state.result_queue is not None + while True: + result_todo = state.result_queue.get() + if result_todo is None: + break + + try: + result = result_todo.result + + if isinstance(result, AudioResult): + # Combine all audio and output to stdout at the end + state.all_audio += result.audio_bytes + state.sample_rate_hz = result.sample_rate_hz + state.sample_width_bytes = result.sample_width_bytes + state.num_channels = result.num_channels + + elif isinstance(result, MarkResult): + if state.mark_writer: + print(result.name, file=state.mark_writer) + + except Exception: + _LOGGER.exception("Error processing result") + except Exception: + _LOGGER.exception("process_result") diff --git a/app/src/main/res/drawable/ic_launcher_background.xml b/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 0000000..90500f0 --- /dev/null +++ b/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,11 @@ + + + + diff --git a/app/src/main/res/drawable/ic_launcher_foreground.xml b/app/src/main/res/drawable/ic_launcher_foreground.xml new file mode 100644 index 0000000..b83158d --- /dev/null +++ b/app/src/main/res/drawable/ic_launcher_foreground.xml @@ -0,0 +1,11 @@ + + + \ No newline at end of file diff --git a/app/src/main/res/layout/activity_main.xml b/app/src/main/res/layout/activity_main.xml new file mode 100644 index 0000000..6dabbd5 --- /dev/null +++ b/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,236 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +