From 581eceb4d5a60fd9e29c9c0e30a0197214d2a3e4 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 1 Jan 2024 12:44:20 +0800 Subject: [PATCH] Build text-to-speech engine APKs (#509) --- .github/workflows/apk-tts-engine.yaml | 121 ++++++++++++++++++ .github/workflows/apk-tts.yaml | 2 +- CMakeLists.txt | 2 +- .../sherpa/onnx/tts/engine/GetSampleText.kt | 91 ++++++++++++- scripts/apk/build-apk-tts-engine.sh.in | 98 ++++++++++++++ scripts/apk/build-apk-tts.sh.in | 2 +- scripts/apk/generate-tts-apk-script.py | 27 +++- 7 files changed, 331 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/apk-tts-engine.yaml create mode 100644 scripts/apk/build-apk-tts-engine.sh.in diff --git a/.github/workflows/apk-tts-engine.yaml b/.github/workflows/apk-tts-engine.yaml new file mode 100644 index 000000000..552f696b0 --- /dev/null +++ b/.github/workflows/apk-tts-engine.yaml @@ -0,0 +1,121 @@ +name: apk-tts-engine + +on: + push: + branches: + - apk-tts + tags: + - '*' + + workflow_dispatch: + +concurrency: + group: apk-tts-engine-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + apk_tts_engine: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: apk for tts engine ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["40"] + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 iso639-lang + + - name: Generate build script + shell: bash + run: | + cd scripts/apk + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-tts-apk-script.py --total $total --index $index + + chmod +x build-apk-tts-engine.sh + mv -v ./build-apk-tts-engine.sh ../.. + + - name: build APK for TTS engine + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-apk-tts-engine.sh + + - name: Display APK for TTS engine + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - uses: actions/upload-artifact@v3 + if: false + with: + name: tts-engine-apk + path: ./apks/*.apk + + - name: Publish to huggingface + if: true + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + mkdir -p tts + cp -v ../apks/*.apk ./tts-engine/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more tts engine apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/apk-tts.yaml b/.github/workflows/apk-tts.yaml index 2180adacb..275a86be2 100644 --- a/.github/workflows/apk-tts.yaml +++ b/.github/workflows/apk-tts.yaml @@ -48,7 +48,7 @@ jobs: - name: Install Python dependencies shell: bash run: | - python3 -m pip install --upgrade pip jinja2 + python3 -m pip install --upgrade pip jinja2 iso639-lang - name: Generate build script shell: bash diff --git a/CMakeLists.txt b/CMakeLists.txt index 6992781fb..833786fbe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.9.6") +set(SHERPA_ONNX_VERSION "1.9.7") # Disable warning about # diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt index fe788afe0..c6e237a36 100644 --- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt +++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/GetSampleText.kt @@ -12,11 +12,98 @@ class GetSampleText : Activity() { var result = TextToSpeech.LANG_AVAILABLE var text: String = "" when(TtsEngine.lang) { + "ara" -> { + text = "هذا هو محرك تحويل النص إلى كلام باستخدام الجيل القادم من كالدي" + } + "cat" -> { + text = "Aquest és un motor de text a veu que utilitza Kaldi de nova generació" + } + "ces" -> { + text = "Toto je převodník textu na řeč využívající novou generaci kaldi" + } + "dan" -> { + text = "Dette er en tekst til tale-motor, der bruger næste generation af kaldi" + } + "deu" -> { + text = "Dies ist eine Text-to-Speech-Engine, die Kaldi der nächsten Generation verwendet" + } + "ell" -> { + text = "Αυτή είναι μια μηχανή κειμένου σε ομιλία που χρησιμοποιεί kaldi επόμενης γενιάς" + } "eng" -> { - text = "This is a text-to-speech engine with next generation Kaldi" + text = "This is a text-to-speech engine using next generation Kaldi" + } + "fin" -> { + text = "Tämä on tekstistä puheeksi -moottori, joka käyttää seuraavan sukupolven kaldia" + } + "fra" -> { + text = "Il s'agit d'un moteur de synthèse vocale utilisant Kaldi de nouvelle génération." + } + "hun" -> { + text = "Ez egy szövegfelolvasó motor a következő generációs kaldi használatával" + } + "isl" -> { + text = "Þetta er texta í tal vél sem notar næstu kynslóð kaldi" + } + "ita" -> { + text = "Questo è un motore di sintesi vocale che utilizza kaldi di nuova generazione" + } + "kat" -> { + text = "ეს არის ტექსტიდან მეტყველების ძრავა შემდეგი თაობის კალდის გამოყენებით" + } + "kaz" -> { + text = "Бұл келесі буын kaldi көмегімен мәтіннен сөйлеуге арналған қозғалтқыш" + } + "ltz" -> { + text = "Dëst ass en Text-zu-Speech-Motor mat der nächster Generatioun Kaldi" + } + "nep" -> { + text = "यो अर्को पुस्ता काल्डी प्रयोग गरेर स्पीच इन्जिनको पाठ हो" + } + "nld" -> { + text = "Dit is een tekst-naar-spraak-engine die gebruik maakt van Kaldi van de volgende generatie" + } + "nor" -> { + text = "Dette er en tekst til tale-motor som bruker neste generasjons kaldi" + } + "pol" -> { + text = "Jest to silnik syntezatora mowy wykorzystujący Kaldi nowej generacji" + } + "por" -> { + text = "Este é um mecanismo de conversão de texto em fala usando Kaldi de próxima geração" + } + "ron" -> { + text = "Acesta este un motor text to speech care folosește generația următoare de kadi" + } + "rus" -> { + text = "Это движок преобразования текста в речь, использующий Kaldi следующего поколения." + } + "slk" -> { + text = "Toto je nástroj na prevod textu na reč využívajúci kaldi novej generácie" + } + "spa" -> { + text = "Este es un motor de texto a voz que utiliza kaldi de próxima generación." + } + "srp" -> { + text = "Ово је механизам за претварање текста у говор који користи калди следеће генерације" + } + "swa" -> { + text = "Haya ni maandishi kwa injini ya hotuba kwa kutumia kizazi kijacho kaldi" + } + "swe" -> { + text = "Detta är en text till tal-motor som använder nästa generations kaldi" + } + "tur" -> { + text = "Bu, yeni nesil kaldi'yi kullanan bir metinden konuşmaya motorudur" + } + "ukr" -> { + text = "Це механізм перетворення тексту на мовлення, який використовує kaldi нового покоління" + } + "vie" -> { + text = "Đây là công cụ chuyển văn bản thành giọng nói sử dụng kaldi thế hệ tiếp theo" } "zho", "cmn" -> { - text = "使用新一代 Kaldi 进行语音合成" + text = "使用新一代卡尔迪的语音合成引擎" } else -> { result = TextToSpeech.LANG_NOT_SUPPORTED diff --git a/scripts/apk/build-apk-tts-engine.sh.in b/scripts/apk/build-apk-tts-engine.sh.in new file mode 100644 index 000000000..9b0ae1e1a --- /dev/null +++ b/scripts/apk/build-apk-tts-engine.sh.in @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# +# Auto generated! Please DO NOT EDIT! + +# Please set the environment variable ANDROID_NDK +# before running this script + +# Inside the $ANDROID_NDK directory, you can find a binary ndk-build +# and some other files like the file "build/cmake/android.toolchain.cmake" + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + +log "Building TTS engine APK for sherpa-onnx v${SHERPA_ONNX_VERSION}" + +log "====================arm64-v8a=================" +./build-android-arm64-v8a.sh +log "====================armv7-eabi================" +./build-android-armv7-eabi.sh +log "====================x86-64====================" +./build-android-x86-64.sh +log "====================x86====================" +./build-android-x86.sh + +mkdir -p apks + +{% for tts_model in tts_model_list %} +pushd ./android/SherpaOnnxTtsEngine/app/src/main/assets/ +model_dir={{ tts_model.model_dir }} +model_name={{ tts_model.model_name }} +lang={{ tts_model.lang }} +lang_iso_639_3={{ tts_model.lang_iso_639_3 }} + +wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2 +tar xf $model_dir.tar.bz2 +rm $model_dir.tar.bz2 + +popd +# Now we are at the project root directory + +git checkout . +pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine +sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt +sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt +sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt + +{% if tts_model.rule_fsts %} + rule_fsts={{ tts_model.rule_fsts }} + sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt +{% endif %} + +{% if tts_model.data_dir %} + data_dir={{ tts_model.data_dir }} + sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt +{% else %} + sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt +{% endif %} + +git diff +popd + +for arch in arm64-v8a armeabi-v7a x86_64 x86; do + log "------------------------------------------------------------" + log "build tts apk for $arch" + log "------------------------------------------------------------" + src_arch=$arch + if [ $arch == "armeabi-v7a" ]; then + src_arch=armv7-eabi + elif [ $arch == "x86_64" ]; then + src_arch=x86-64 + fi + + ls -lh ./build-android-$src_arch/install/lib/*.so + + cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxTtsEngine/app/src/main/jniLibs/$arch/ + + pushd ./android/SherpaOnnxTtsEngine + ./gradlew build + popd + + mv android/SherpaOnnxTtsEngine/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-$lang-tts-engine-$model_dir.apk + ls -lh apks + rm -v ./android/SherpaOnnxTtsEngine/app/src/main/jniLibs/$arch/*.so +done + +rm -rf ./android/SherpaOnnxTtsEngine/app/src/main/assets/$model_dir +{% endfor %} + +git checkout . + +ls -lh apks/ diff --git a/scripts/apk/build-apk-tts.sh.in b/scripts/apk/build-apk-tts.sh.in index 779bb7978..d1745efd4 100644 --- a/scripts/apk/build-apk-tts.sh.in +++ b/scripts/apk/build-apk-tts.sh.in @@ -18,7 +18,7 @@ log() { SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) -log "Building APK for sherpa-onnx v${SHERPA_ONNX_VERSION}" +log "Building TTS APK for sherpa-onnx v${SHERPA_ONNX_VERSION}" log "====================arm64-v8a=================" ./build-android-arm64-v8a.sh diff --git a/scripts/apk/generate-tts-apk-script.py b/scripts/apk/generate-tts-apk-script.py index b32836c3e..eaedad7c1 100755 --- a/scripts/apk/generate-tts-apk-script.py +++ b/scripts/apk/generate-tts-apk-script.py @@ -6,6 +6,9 @@ import jinja2 +# pip install iso639-lang +from iso639 import Lang + def get_args(): parser = argparse.ArgumentParser() @@ -31,6 +34,12 @@ class TtsModel: lang: str = "" # en, zh, fr, de, etc. rule_fsts: Optional[List[str]] = None data_dir: Optional[str] = None + lang_iso_639_3: str = "" + + +def convert_lang_to_iso_639_3(models: List[TtsModel]): + for m in models: + m.lang_iso_639_3 = Lang(m.lang).pt3 def get_coqui_models() -> List[TtsModel]: @@ -234,15 +243,12 @@ def main(): index = args.index total = args.total assert 0 <= index < total, (index, total) - environment = jinja2.Environment() - with open("./build-apk-tts.sh.in") as f: - s = f.read() - template = environment.from_string(s) d = dict() all_model_list = get_vits_models() all_model_list += get_piper_models() all_model_list += get_coqui_models() + convert_lang_to_iso_639_3(all_model_list) num_models = len(all_model_list) @@ -262,9 +268,16 @@ def main(): d["tts_model_list"].append(all_model_list[s]) print(f"{s}/{num_models}") - s = template.render(**d) - with open("./build-apk-tts.sh", "w") as f: - print(s, file=f) + filename_list = ["./build-apk-tts.sh", "./build-apk-tts-engine.sh"] + for filename in filename_list: + environment = jinja2.Environment() + with open(f"{filename}.in") as f: + s = f.read() + template = environment.from_string(s) + + s = template.render(**d) + with open(filename, "w") as f: + print(s, file=f) if __name__ == "__main__":