From ff2bef9e501a4b5ebfec04cbfe8afa2e8bea4b40 Mon Sep 17 00:00:00 2001 From: Yuekai Zhang Date: Wed, 19 Jun 2024 11:10:31 +0800 Subject: [PATCH] update multi-hans whisper-qwen-1.5b results (#1657) --- egs/speech_llm/ASR_LLM/RESULTS.md | 29 +++++++++++++++++---- egs/speechio/ASR/local/normalize_results.py | 7 +++-- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/egs/speech_llm/ASR_LLM/RESULTS.md b/egs/speech_llm/ASR_LLM/RESULTS.md index dc2479054f..05c0ffd277 100644 --- a/egs/speech_llm/ASR_LLM/RESULTS.md +++ b/egs/speech_llm/ASR_LLM/RESULTS.md @@ -2,12 +2,31 @@ ### whisper_llm_zh finetuning results -| Training Dataset | Speech Encoder | LLM | Projector |Comment | CER | -| -------------------------| ----------------|------|--------------------------------------------------|-----|--| -| Aishell1 | whisper-large-v2-aishell1-ft, freeze| Qwen2-1.5B-Instruct, LoRA | Linear, 8x downsample| [yuekai/icefall_asr_aishell_whisper_qwen2_1.5B](https://huggingface.co/yuekai/icefall_asr_aishell_whisper_qwen2_1.5B) | Aishell1 Test 3.62% | - +|Model| Training Dataset | Speech Encoder | LLM | Projector | +|-| -------------------------| ----------------|------|---------------| +|[yuekai/icefall_asr_aishell_whisper_qwen2_1.5B](https://huggingface.co/yuekai/icefall_asr_aishell_whisper_qwen2_1.5B) | Aishell1 | whisper-large-v2-aishell1-ft, freeze| Qwen2-1.5B-Instruct, LoRA | Linear, 8x downsample| +| [yuekai/icefall_asr_multi-hans_whisper_qwen2_1.5B](https://huggingface.co/yuekai/icefall_asr_multi-hans_whisper_qwen2_1.5B) |Multi-hans-zh | whisper-large-v2-multi-hans-ft, freeze| Qwen2-1.5B-Instruct, LoRA | Linear, 8x downsample|| +CER Details: +| Model | [yuekai/icefall_asr_aishell_whisper_qwen2_1.5B](https://huggingface.co/yuekai/icefall_asr_aishell_whisper_qwen2_1.5B) | [yuekai/icefall_asr_multi-hans_whisper_qwen2_1.5B](https://huggingface.co/yuekai/icefall_asr_multi-hans_whisper_qwen2_1.5B) | +|-------|------------------------------------------------|----------------------------------------------------| +| Split | Greedy Search | Greedy Search | +| aishell-1 dev | - | 0.66 | +| aishell-1 test | 3.62 | 0.68 | +| aishell-2 dev | - | 2.67 | +| aishell-2 test | - | 2.94 | +| aishell-4 test | - | 16.20 | +| alimeeting eval | - | 30.86 | +| alimeeting test | - | 40.50 | +| magicdata dev | - | 2.50 | +| magicdata test | - | 1.70 | +| kespeech-asr dev phase1 | - | 6.22 | +| kespeech-asr dev phase2 | - | 2.18 | +| kespeech-asr test | - | 6.59 | +| WenetSpeech dev | - | 4.59 | +| WenetSpeech test_meeting | - | 6.41 | +| WenetSpeech tes_net | - | 6.63 | +| SPEECHIO Avg 001-026 | - | 4.80 | Command for training is: ```bash pip install -r whisper_llm_zh/requirements.txt diff --git a/egs/speechio/ASR/local/normalize_results.py b/egs/speechio/ASR/local/normalize_results.py index 14eb1bb2fb..79d8866178 100755 --- a/egs/speechio/ASR/local/normalize_results.py +++ b/egs/speechio/ASR/local/normalize_results.py @@ -16,12 +16,11 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -This file uses whisper and zipformer decoding results to generate fusion decoding results. -Since whisper model is more likely to make deletion errors and zipformer model is more likely to make substitution and insertion errors, -we trust whisper model when it makes substitution and insertion errors and trust zipformer model when it makes deletion errors. +This file uses speech io offcial pipline to normalize the decoding results. +https://github.com/SpeechColab/Leaderboard/blob/master/utils/textnorm_zh.py Usage: - python whisper_zipformer_fusion.py --model-log-dir ./whisper_decoding_log_dir --output-log-dir ./results_norm + python normalize_results.py --model-log-dir ./whisper_decoding_log_dir --output-log-dir ./results_norm """ import argparse