From da6ff8d5bbe87acae943fbfae508bd8735ad5896 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 26 Apr 2024 17:51:16 +0800 Subject: [PATCH] fix kws --- .github/workflows/apk-kws.yaml | 95 +++++++++++++++++-- .github/workflows/run-java-test.yaml | 8 ++ java-api-examples/.gitignore | 3 +- java-api-examples/README.md | 6 ++ .../SpokenLanguageIdentificationWhisper.java | 61 ++++++++++++ ...-spoken-language-identification-whisper.sh | 59 ++++++++++++ sherpa-onnx/java-api/Makefile | 4 + .../onnx/SpokenLanguageIdentification.java | 17 +++- 8 files changed, 241 insertions(+), 12 deletions(-) create mode 100644 java-api-examples/SpokenLanguageIdentificationWhisper.java create mode 100755 java-api-examples/run-spoken-language-identification-whisper.sh diff --git a/.github/workflows/apk-kws.yaml b/.github/workflows/apk-kws.yaml index 7c85e6afa..e5be8f7f1 100644 --- a/.github/workflows/apk-kws.yaml +++ b/.github/workflows/apk-kws.yaml @@ -44,6 +44,23 @@ jobs: echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" ls -lh ${ANDROID_NDK_LATEST_HOME} + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + - name: build APK shell: bash run: | @@ -59,13 +76,77 @@ jobs: run: | ls -lh ./apks/ - - uses: actions/upload-artifact@v4 + + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK with: - path: ./apks/*.apk + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} - - name: Release APK - uses: svenstaro/upload-release-action@v2 + - name: Display APK after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 with: - file_glob: true - file: apks/*.apk - overwrite: true + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + mkdir -p kws + cp -v ../apks/*.apk ./kws/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index 3a3ae9038..37719b61d 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -106,6 +106,14 @@ jobs: make -j4 ls -lh lib + - name: Run java test (Spoken language identification) + shell: bash + run: | + cd ./java-api-examples + ./run-spoken-language-identification-whisper.sh + # Delete model files to save space + rm -rf sherpa-onnx-whisper-* + - name: Run java test (Streaming ASR) shell: bash run: | diff --git a/java-api-examples/.gitignore b/java-api-examples/.gitignore index 6091eb469..91c35d7ae 100644 --- a/java-api-examples/.gitignore +++ b/java-api-examples/.gitignore @@ -1,4 +1,3 @@ lib hs_err* -!run-streaming*.sh -!run-non-streaming*.sh +!run-*.sh diff --git a/java-api-examples/README.md b/java-api-examples/README.md index c653dbb4b..e40de7b63 100755 --- a/java-api-examples/README.md +++ b/java-api-examples/README.md @@ -29,3 +29,9 @@ This directory contains examples for the JAVA API of sherpa-onnx. ./run-non-streaming-tts-coqui-de.sh ./run-non-streaming-tts-vits-zh.sh ``` + +## Spoken language identification + +```bash +./run-spoken-language-identification-whisper.sh +``` diff --git a/java-api-examples/SpokenLanguageIdentificationWhisper.java b/java-api-examples/SpokenLanguageIdentificationWhisper.java new file mode 100644 index 000000000..f2e690b88 --- /dev/null +++ b/java-api-examples/SpokenLanguageIdentificationWhisper.java @@ -0,0 +1,61 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a multilingual whisper model for +// spoken language identification. +// +// Note that it needs a multilingual whisper model. For instance, +// tiny works, but tiny.en doesn't. +import com.k2fsa.sherpa.onnx.*; + +public class SpokenLanguageIdentificationWhisper { + public static void main(String[] args) { + // please download model and test files from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + String encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"; + String decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"; + + String[] testFiles = + new String[] { + "./spoken-language-identification-test-wavs/en-english.wav", + "./spoken-language-identification-test-wavs/de-german.wav", + "./spoken-language-identification-test-wavs/zh-chinese.wav", + "./spoken-language-identification-test-wavs/es-spanish.wav", + "./spoken-language-identification-test-wavs/fa-persian.wav", + "./spoken-language-identification-test-wavs/ko-korean.wav", + "./spoken-language-identification-test-wavs/ja-japanese.wav", + "./spoken-language-identification-test-wavs/ru-russian.wav", + "./spoken-language-identification-test-wavs/uk-ukrainian.wav", + }; + + SpokenLanguageIdentificationWhisperConfig whisper = + SpokenLanguageIdentificationWhisperConfig.builder() + .setEncoder(encoder) + .setDecoder(decoder) + .build(); + + SpokenLanguageIdentificationConfig config = + SpokenLanguageIdentificationConfig.builder() + .setWhisper(whisper) + .setNumThreads(1) + .setDebug(true) + .build(); + + SpokenLanguageIdentification slid = new SpokenLanguageIdentification(config); + for (String filename : testFiles) { + WaveReader reader = new WaveReader(filename); + + OfflineStream stream = slid.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + String lang = slid.compute(stream); + System.out.println("---"); + System.out.printf("filename: %s\n", filename); + System.out.printf("lang: %s\n", lang); + + stream.release(); + } + System.out.println("---"); + + slid.release(); + } +} diff --git a/java-api-examples/run-spoken-language-identification-whisper.sh b/java-api-examples/run-spoken-language-identification-whisper.sh new file mode 100755 index 000000000..485232128 --- /dev/null +++ b/java-api-examples/run-spoken-language-identification-whisper.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib +fi + +# Note that it needs a multilingual whisper model. so, for example, tiny works while tiny.en does not work +# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 +if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 + tar xvf sherpa-onnx-whisper-tiny.tar.bz2 + rm sherpa-onnx-whisper-tiny.tar.bz2 +fi + +if [ ! -f ./spoken-language-identification-test-wavs/en-english.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2 + tar xvf spoken-language-identification-test-wavs.tar.bz2 + rm spoken-language-identification-test-wavs.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./SpokenLanguageIdentificationWhisper.java diff --git a/sherpa-onnx/java-api/Makefile b/sherpa-onnx/java-api/Makefile index 2e02f4c4b..67cedde48 100644 --- a/sherpa-onnx/java-api/Makefile +++ b/sherpa-onnx/java-api/Makefile @@ -36,6 +36,10 @@ java_files += OfflineTtsConfig.java java_files += GeneratedAudio.java java_files += OfflineTts.java +java_files += SpokenLanguageIdentificationWhisperConfig.java +java_files += SpokenLanguageIdentificationConfig.java +java_files += SpokenLanguageIdentification.java + class_files := $(java_files:%.java=%.class) java_files := $(addprefix src/$(package_dir)/,$(java_files)) diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.java index e761703d0..379f38530 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.java @@ -2,21 +2,32 @@ package com.k2fsa.sherpa.onnx; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + public class SpokenLanguageIdentification { static { System.loadLibrary("sherpa-onnx-jni"); } + private final Map localeMap; private long ptr = 0; // this is the asr engine ptrss - // private final localeMap - public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) { ptr = newFromFile(config); + + String[] languages = Locale.getISOLanguages(); + localeMap = new HashMap(languages.length); + for (String language : languages) { + Locale locale = new Locale(language); + localeMap.put(language, locale.getDisplayName()); + } } public String compute(OfflineStream stream) { - return compute(ptr, stream.getPtr()); + String lang = compute(ptr, stream.getPtr()); + return localeMap.getOrDefault(lang, lang); } public OfflineStream createStream() {