Skip to content

Commit

Permalink
Add C++ support for non-streaming NeMo fast conformer hybrid transduc…
Browse files Browse the repository at this point in the history
…er ctc (the ctc branch) (#848)
  • Loading branch information
csukuangfj authored May 9, 2024
1 parent 5ed3ec1 commit 5d8c35e
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 33 deletions.
101 changes: 99 additions & 2 deletions .github/scripts/test-offline-ctc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,111 @@ echo "PATH: $PATH"

which $EXE

log "-----------------------------------------------------------------"
log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
log "-----------------------------------------------------------------"

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "test $repo"
test_wavs=(
de-german.wav
es-spanish.wav
hr-croatian.wav
po-polish.wav
uk-ukrainian.wav
en-english.wav
fr-french.wav
it-italian.wav
ru-russian.wav
)
for w in ${test_wavs[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--nemo-ctc-model=$repo/model.onnx \
--debug=1 \
$repo/test_wavs/$w
done

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-24500.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "Test $repo"

time $EXE \
--tokens=$repo/tokens.txt \
--nemo-ctc-model=$repo/model.onnx \
--debug=1 \
$repo/test_wavs/en-english.wav

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-es-1424.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "test $repo"

time $EXE \
--tokens=$repo/tokens.txt \
--nemo-ctc-model=$repo/model.onnx \
--debug=1 \
$repo/test_wavs/es-spanish.wav

rm -rf $repo

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288.tar.bz2
name=$(basename $url)
curl -SL -O $url
tar xvf $name
rm $name
repo=$(basename -s .tar.bz2 $name)
ls -lh $repo

log "Test $repo"

test_wavs=(
en-english.wav
de-german.wav
fr-french.wav
es-spanish.wav
)

for w in ${test_wavs[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--nemo-ctc-model=$repo/model.onnx \
--debug=1 \
$repo/test_wavs/$w
done

rm -rf $repo

log "------------------------------------------------------------"
log "Run Wenet models"
log "------------------------------------------------------------"
wenet_models=(
sherpa-onnx-zh-wenet-aishell
sherpa-onnx-zh-wenet-aishell2
# sherpa-onnx-zh-wenet-aishell2
# sherpa-onnx-zh-wenet-wenetspeech
sherpa-onnx-zh-wenet-multi-cn
# sherpa-onnx-zh-wenet-multi-cn
sherpa-onnx-en-wenet-librispeech
# sherpa-onnx-en-wenet-gigaspeech
)
Expand Down
9 changes: 7 additions & 2 deletions .github/scripts/test-spoken-language-identification.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ for wav in ${waves[@]}; do
ls -lh *.wav
done

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2
tar xvf spoken-language-identification-test-wavs.tar.bz2
rm spoken-language-identification-test-wavs.tar.bz2
data=spoken-language-identification-test-wavs

for name in ${names[@]}; do
log "------------------------------------------------------------"
log "Run $name"
Expand All @@ -85,14 +90,14 @@ for name in ${names[@]}; do
time $EXE \
--whisper-encoder=$repo/${name}-encoder.onnx \
--whisper-decoder=$repo/${name}-decoder.onnx \
$wav
$data/$wav

log "test int8 onnx"

time $EXE \
--whisper-encoder=$repo/${name}-encoder.int8.onnx \
--whisper-decoder=$repo/${name}-decoder.int8.onnx \
$wav
$data/$wav
done
rm -rf $repo
done
34 changes: 16 additions & 18 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,13 @@ jobs:
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: install/*

- name: Test offline punctuation
- name: Test spoken language identification (C++ API)
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-punctuation
export EXE=sherpa-onnx-offline-language-identification
.github/scripts/test-offline-punctuation.sh
.github/scripts/test-spoken-language-identification.sh
- name: Test C API
shell: bash
Expand All @@ -147,13 +147,13 @@ jobs:
.github/scripts/test-c-api.sh
- name: Test Audio tagging
- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-audio-tagging
export EXE=sherpa-onnx-offline
.github/scripts/test-audio-tagging.sh
.github/scripts/test-offline-ctc.sh
- name: Test online CTC
shell: bash
Expand All @@ -163,14 +163,21 @@ jobs:
.github/scripts/test-online-ctc.sh
- name: Test offline punctuation
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-punctuation
.github/scripts/test-offline-punctuation.sh
- name: Test spoken language identification (C++ API)
- name: Test Audio tagging
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline-language-identification
export EXE=sherpa-onnx-offline-audio-tagging
.github/scripts/test-spoken-language-identification.sh
.github/scripts/test-audio-tagging.sh
- name: Test transducer kws
shell: bash
Expand All @@ -180,7 +187,6 @@ jobs:
.github/scripts/test-kws.sh
- name: Test offline Whisper
if: matrix.build_type != 'Debug'
shell: bash
Expand All @@ -192,14 +198,6 @@ jobs:
.github/scripts/test-offline-whisper.sh
- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
- name: Test offline TTS
if: matrix.with_tts == 'ON'
shell: bash
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test online CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-ctc.sh
- name: Test offline punctuation
shell: bash
run: |
Expand Down Expand Up @@ -150,14 +158,6 @@ jobs:
.github/scripts/test-kws.sh
- name: Test online CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
.github/scripts/test-online-ctc.sh
- name: Test offline TTS
if: matrix.with_tts == 'ON'
shell: bash
Expand Down
14 changes: 13 additions & 1 deletion sherpa-onnx/csrc/offline-ctc-model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace {

enum class ModelType {
kEncDecCTCModelBPE,
kEncDecHybridRNNTCTCBPEModel,
kTdnn,
kZipformerCtc,
kWenetCtc,
Expand Down Expand Up @@ -55,7 +56,10 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,
"No model_type in the metadata!\n"
"If you are using models from NeMo, please refer to\n"
"https://huggingface.co/csukuangfj/"
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py"
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py\n"
"or "
"https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/nemo/"
"fast-conformer-hybrid-transducer-ctc\n"
"If you are using models from WeNet, please refer to\n"
"https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/wenet/"
"run.sh\n"
Expand All @@ -66,6 +70,8 @@ static ModelType GetModelType(char *model_data, size_t model_data_length,

if (model_type.get() == std::string("EncDecCTCModelBPE")) {
return ModelType::kEncDecCTCModelBPE;
} else if (model_type.get() == std::string("EncDecHybridRNNTCTCBPEModel")) {
return ModelType::kEncDecHybridRNNTCTCBPEModel;
} else if (model_type.get() == std::string("tdnn")) {
return ModelType::kTdnn;
} else if (model_type.get() == std::string("zipformer2_ctc")) {
Expand Down Expand Up @@ -106,6 +112,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
case ModelType::kEncDecCTCModelBPE:
return std::make_unique<OfflineNemoEncDecCtcModel>(config);
break;
case ModelType::kEncDecHybridRNNTCTCBPEModel:
return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
break;
case ModelType::kTdnn:
return std::make_unique<OfflineTdnnCtcModel>(config);
break;
Expand Down Expand Up @@ -153,6 +162,9 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
case ModelType::kEncDecCTCModelBPE:
return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
break;
case ModelType::kEncDecHybridRNNTCTCBPEModel:
return std::make_unique<OfflineNemoEncDecHybridRNNTCTCBPEModel>(config);
break;
case ModelType::kTdnn:
return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
break;
Expand Down
2 changes: 2 additions & 0 deletions sherpa-onnx/csrc/offline-nemo-enc-dec-ctc-model.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
std::unique_ptr<Impl> impl_;
};

using OfflineNemoEncDecHybridRNNTCTCBPEModel = OfflineNemoEncDecCtcModel;

} // namespace sherpa_onnx

#endif // SHERPA_ONNX_CSRC_OFFLINE_NEMO_ENC_DEC_CTC_MODEL_H_
8 changes: 6 additions & 2 deletions sherpa-onnx/csrc/offline-recognizer-impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
return std::make_unique<OfflineRecognizerParaformerImpl>(config);
}

if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" ||
if (model_type == "EncDecCTCModelBPE" ||
model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
return std::make_unique<OfflineRecognizerCtcImpl>(config);
}
Expand All @@ -137,6 +138,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
" - Non-streaming transducer models from icefall\n"
" - Non-streaming Paraformer models from FunASR\n"
" - EncDecCTCModelBPE models from NeMo\n"
" - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
" - Whisper models\n"
" - Tdnn models\n"
" - Zipformer CTC models\n"
Expand Down Expand Up @@ -252,7 +254,8 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
}

if (model_type == "EncDecCTCModelBPE" || model_type == "tdnn" ||
if (model_type == "EncDecCTCModelBPE" ||
model_type == "EncDecHybridRNNTCTCBPEModel" || model_type == "tdnn" ||
model_type == "zipformer2_ctc" || model_type == "wenet_ctc") {
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
}
Expand All @@ -267,6 +270,7 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
" - Non-streaming transducer models from icefall\n"
" - Non-streaming Paraformer models from FunASR\n"
" - EncDecCTCModelBPE models from NeMo\n"
" - EncDecHybridRNNTCTCBPEModel models from NeMo\n"
" - Whisper models\n"
" - Tdnn models\n"
" - Zipformer CTC models\n"
Expand Down
4 changes: 4 additions & 0 deletions sherpa-onnx/csrc/symbol-table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,13 @@ void SymbolTable::Init(std::istream &is) {
// the following check.
//
// Note: Only id2sym_ matters as we use it to convert ID to symbols.
#if 0
// we disable the test here since for some multi-lingual BPE models
// from NeMo, the same symbol can appear multiple times with different IDs.
if (sym != " ") {
assert(sym2id_.count(sym) == 0);
}
#endif

assert(id2sym_.count(id) == 0);

Expand Down

0 comments on commit 5d8c35e

Please sign in to comment.