diff --git a/.github/scripts/test-offline-ctc.sh b/.github/scripts/test-offline-ctc.sh index a6d16d97d..ba2957dc3 100755 --- a/.github/scripts/test-offline-ctc.sh +++ b/.github/scripts/test-offline-ctc.sh @@ -13,14 +13,111 @@ echo "PATH: $PATH" which $EXE +log "-----------------------------------------------------------------" +log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)" +log "-----------------------------------------------------------------" + +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 +name=$(basename $url) +curl -SL -O $url +tar xvf $name +rm $name +repo=$(basename -s .tar.bz2 $name) +ls -lh $repo + +log "test $repo" +test_wavs=( +de-german.wav +es-spanish.wav +hr-croatian.wav +po-polish.wav +uk-ukrainian.wav +en-english.wav +fr-french.wav +it-italian.wav +ru-russian.wav +) +for w in ${test_wavs[@]}; do + time $EXE \ + --tokens=$repo/tokens.txt \ + --nemo-ctc-model=$repo/model.onnx \ + --debug=1 \ + $repo/test_wavs/$w +done + +rm -rf $repo + +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-24500.tar.bz2 +name=$(basename $url) +curl -SL -O $url +tar xvf $name +rm $name +repo=$(basename -s .tar.bz2 $name) +ls -lh $repo + +log "Test $repo" + +time $EXE \ + --tokens=$repo/tokens.txt \ + --nemo-ctc-model=$repo/model.onnx \ + --debug=1 \ + $repo/test_wavs/en-english.wav + +rm -rf $repo + +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-es-1424.tar.bz2 +name=$(basename $url) +curl -SL -O $url +tar xvf $name +rm $name +repo=$(basename -s .tar.bz2 $name) +ls -lh $repo + +log "test $repo" + +time $EXE \ + --tokens=$repo/tokens.txt \ + --nemo-ctc-model=$repo/model.onnx \ + --debug=1 \ + $repo/test_wavs/es-spanish.wav + +rm -rf $repo + +url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288.tar.bz2 +name=$(basename $url) +curl -SL -O $url +tar xvf $name +rm $name +repo=$(basename -s .tar.bz2 $name) +ls -lh $repo + +log "Test $repo" + +test_wavs=( +en-english.wav +de-german.wav +fr-french.wav +es-spanish.wav +) + +for w in ${test_wavs[@]}; do + time $EXE \ + --tokens=$repo/tokens.txt \ + --nemo-ctc-model=$repo/model.onnx \ + --debug=1 \ + $repo/test_wavs/$w +done + +rm -rf $repo + log "------------------------------------------------------------" log "Run Wenet models" log "------------------------------------------------------------" wenet_models=( sherpa-onnx-zh-wenet-aishell -sherpa-onnx-zh-wenet-aishell2 +# sherpa-onnx-zh-wenet-aishell2 # sherpa-onnx-zh-wenet-wenetspeech -sherpa-onnx-zh-wenet-multi-cn +# sherpa-onnx-zh-wenet-multi-cn sherpa-onnx-en-wenet-librispeech # sherpa-onnx-en-wenet-gigaspeech ) diff --git a/.github/scripts/test-spoken-language-identification.sh b/.github/scripts/test-spoken-language-identification.sh index 4c15eff74..75d1364d4 100755 --- a/.github/scripts/test-spoken-language-identification.sh +++ b/.github/scripts/test-spoken-language-identification.sh @@ -62,6 +62,11 @@ for wav in ${waves[@]}; do ls -lh *.wav done +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2 +tar xvf spoken-language-identification-test-wavs.tar.bz2 +rm spoken-language-identification-test-wavs.tar.bz2 +data=spoken-language-identification-test-wavs + for name in ${names[@]}; do log "------------------------------------------------------------" log "Run $name" @@ -85,14 +90,14 @@ for name in ${names[@]}; do time $EXE \ --whisper-encoder=$repo/${name}-encoder.onnx \ --whisper-decoder=$repo/${name}-decoder.onnx \ - $wav + $data/$wav log "test int8 onnx" time $EXE \ --whisper-encoder=$repo/${name}-encoder.int8.onnx \ --whisper-decoder=$repo/${name}-decoder.int8.onnx \ - $wav + $data/$wav done rm -rf $repo done diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index 260b99af5..c1e8a69a8 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -128,13 +128,13 @@ jobs: name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }} path: install/* - - name: Test offline punctuation + - name: Test spoken language identification (C++ API) shell: bash run: | export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx-offline-punctuation + export EXE=sherpa-onnx-offline-language-identification - .github/scripts/test-offline-punctuation.sh + .github/scripts/test-spoken-language-identification.sh - name: Test C API shell: bash @@ -147,13 +147,13 @@ jobs: .github/scripts/test-c-api.sh - - name: Test Audio tagging + - name: Test offline CTC shell: bash run: | export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx-offline-audio-tagging + export EXE=sherpa-onnx-offline - .github/scripts/test-audio-tagging.sh + .github/scripts/test-offline-ctc.sh - name: Test online CTC shell: bash @@ -163,14 +163,21 @@ jobs: .github/scripts/test-online-ctc.sh + - name: Test offline punctuation + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-punctuation + + .github/scripts/test-offline-punctuation.sh - - name: Test spoken language identification (C++ API) + - name: Test Audio tagging shell: bash run: | export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx-offline-language-identification + export EXE=sherpa-onnx-offline-audio-tagging - .github/scripts/test-spoken-language-identification.sh + .github/scripts/test-audio-tagging.sh - name: Test transducer kws shell: bash @@ -180,7 +187,6 @@ jobs: .github/scripts/test-kws.sh - - name: Test offline Whisper if: matrix.build_type != 'Debug' shell: bash @@ -192,14 +198,6 @@ jobs: .github/scripts/test-offline-whisper.sh - - name: Test offline CTC - shell: bash - run: | - export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx-offline - - .github/scripts/test-offline-ctc.sh - - name: Test offline TTS if: matrix.with_tts == 'ON' shell: bash diff --git a/.github/workflows/macos.yaml b/.github/workflows/macos.yaml index e70ff11e1..616877126 100644 --- a/.github/workflows/macos.yaml +++ b/.github/workflows/macos.yaml @@ -107,6 +107,14 @@ jobs: otool -L build/bin/sherpa-onnx otool -l build/bin/sherpa-onnx + - name: Test online CTC + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx + + .github/scripts/test-online-ctc.sh + - name: Test offline punctuation shell: bash run: | @@ -150,14 +158,6 @@ jobs: .github/scripts/test-kws.sh - - name: Test online CTC - shell: bash - run: | - export PATH=$PWD/build/bin:$PATH - export EXE=sherpa-onnx - - .github/scripts/test-online-ctc.sh - - name: Test offline TTS if: matrix.with_tts == 'ON' shell: bash diff --git a/sherpa-onnx/csrc/offline-ctc-model.cc b/sherpa-onnx/csrc/offline-ctc-model.cc index 0bfa56add..e22027e71 100644 --- a/sherpa-onnx/csrc/offline-ctc-model.cc +++ b/sherpa-onnx/csrc/offline-ctc-model.cc @@ -20,6 +20,7 @@ namespace { enum class ModelType { kEncDecCTCModelBPE, + kEncDecHybridRNNTCTCBPEModel, kTdnn, kZipformerCtc, kWenetCtc, @@ -55,7 +56,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length, "No model_type in the metadata!\n" "If you are using models from NeMo, please refer to\n" "https://huggingface.co/csukuangfj/" - "sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py" + "sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py\n" + "or " + "https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/nemo/" + "fast-conformer-hybrid-transducer-ctc\n" "If you are using models from WeNet, please refer to\n" "https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/" "run.sh\n" @@ -66,6 +70,8 @@ static ModelType GetModelType(char *model_data, size_t model_data_length, if (model_type.get() == std::string("EncDecCTCModelBPE")) { return ModelType::kEncDecCTCModelBPE; + } else if (model_type.get() == std::string("EncDecHybridRNNTCTCBPEModel")) { + return ModelType::kEncDecHybridRNNTCTCBPEModel; } else if (model_type.get() == std::string("tdnn")) { return ModelType::kTdnn; } else if (model_type.get() == std::string("zipformer2_ctc")) { @@ -106,6 +112,9 @@ std::unique_ptr OfflineCtcModel::Create( case ModelType::kEncDecCTCModelBPE: return std::make_unique(config); break; + case ModelType::kEncDecHybridRNNTCTCBPEModel: + return std::make_unique(config); + break; case ModelType::kTdnn: return std::make_unique(config); break; @@ -153,6 +162,9 @@ std::unique_ptr OfflineCtcModel::Create( case ModelType::kEncDecCTCModelBPE: return std::make_unique(mgr, config); break; + case ModelType::kEncDecHybridRNNTCTCBPEModel: + return std::make_unique(config); + break; case ModelType::kTdnn: return std::make_unique(mgr, config); break; diff --git a/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h b/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h index 9cdfe1996..6e1ba5855 100644 --- a/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h +++ b/sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h @@ -81,6 +81,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel { std::unique_ptr impl_; }; +using OfflineNemoEncDecHybridRNNTCTCBPEModel = OfflineNemoEncDecCtcModel; + } // namespace sherpa_onnx #endif // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_ diff --git a/sherpa-onnx/csrc/offline-recognizer-impl.cc b/sherpa-onnx/csrc/offline-recognizer-impl.cc index c89b868e3..cb9246b26 100644 --- a/sherpa-onnx/csrc/offline-recognizer-impl.cc +++ b/sherpa-onnx/csrc/offline-recognizer-impl.cc @@ -122,7 +122,8 @@ std::unique_ptr OfflineRecognizerImpl::Create( return std::make_unique(config); } - if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" || + if (model_type == "EncDecCTCModelBPE" || + model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" || model_type == "zipformer2_ctc" || model_type == "wenet_ctc") { return std::make_unique(config); } @@ -137,6 +138,7 @@ std::unique_ptr OfflineRecognizerImpl::Create( " - Non-streaming transducer models from icefall\n" " - Non-streaming Paraformer models from FunASR\n" " - EncDecCTCModelBPE models from NeMo\n" + " - EncDecHybridRNNTCTCBPEModel models from NeMo\n" " - Whisper models\n" " - Tdnn models\n" " - Zipformer CTC models\n" @@ -252,7 +254,8 @@ std::unique_ptr OfflineRecognizerImpl::Create( return std::make_unique(mgr, config); } - if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" || + if (model_type == "EncDecCTCModelBPE" || + model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" || model_type == "zipformer2_ctc" || model_type == "wenet_ctc") { return std::make_unique(mgr, config); } @@ -267,6 +270,7 @@ std::unique_ptr OfflineRecognizerImpl::Create( " - Non-streaming transducer models from icefall\n" " - Non-streaming Paraformer models from FunASR\n" " - EncDecCTCModelBPE models from NeMo\n" + " - EncDecHybridRNNTCTCBPEModel models from NeMo\n" " - Whisper models\n" " - Tdnn models\n" " - Zipformer CTC models\n" diff --git a/sherpa-onnx/csrc/symbol-table.cc b/sherpa-onnx/csrc/symbol-table.cc index 1d230bca3..d27249f4b 100644 --- a/sherpa-onnx/csrc/symbol-table.cc +++ b/sherpa-onnx/csrc/symbol-table.cc @@ -67,9 +67,13 @@ void SymbolTable::Init(std::istream &is) { // the following check. // // Note: Only id2sym_ matters as we use it to convert ID to symbols. +#if 0 + // we disable the test here since for some multi-lingual BPE models + // from NeMo, the same symbol can appear multiple times with different IDs. if (sym != " ") { assert(sym2id_.count(sym) == 0); } +#endif assert(id2sym_.count(id) == 0);