-
Notifications
You must be signed in to change notification settings - Fork 304
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
1,192 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#!/usr/bin/env bash | ||
|
||
# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674 | ||
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python | ||
|
||
set -eou pipefail | ||
|
||
nj=15 | ||
stage=0 | ||
stop_stage=100 | ||
|
||
. shared/parse_options.sh || exit 1 | ||
|
||
# All files generated by this script are saved in "data". | ||
# You can safely remove "data" and rerun this script to regenerate it. | ||
mkdir -p data | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then | ||
log "Stage 0: Prepare gigaspeech dataset." | ||
mkdir -p data/fbank | ||
if [ ! -e data/fbank/.gigaspeech.done ]; then | ||
pushd ../ASR | ||
./prepare.sh --stage 0 --stop-stage 9 | ||
./prepare.sh --stage 11 --stop-stage 11 | ||
popd | ||
pushd data/fbank | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_DEV.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_DEV.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_TEST.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_TEST.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_L.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_L.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_M.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_M.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_S.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_S.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_XS.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_XS.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/XL_split) . | ||
ln -svf $(realpath ../ASR/data/fbank/musan_cuts.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/musan_feats) . | ||
popd | ||
pushd data | ||
ln -svf $(realpath ../ASR/data/lang_bpe_500) . | ||
popd | ||
else | ||
log "Gigaspeech dataset already exists, skipping." | ||
fi | ||
fi | ||
|
||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then | ||
log "Stage 1: Prepare open commands dataset." | ||
mkdir -p data/fbank | ||
if [ ! -e data/fbank/.fluent_speech_commands.done ]; then | ||
pushd data | ||
git clone https://github.com/pkufool/open-commands.git | ||
ln -svf $(realpath ./open-commands/EN/small/commands.txt) commands_small.txt | ||
ln -svf $(realpath ./open-commands/EN/large/commands.txt) commands_large.txt | ||
pushd open-commands | ||
./script/prepare.sh --stage 3 --stop-stage 3 | ||
./script/prepare.sh --stage 6 --stop-stage 6 | ||
popd | ||
popd | ||
pushd data/fbank | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_cuts_large.jsonl.gz) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_feats_large) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_cuts_small.jsonl.gz) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_feats_small) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_cuts_valid.jsonl.gz) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_feats_valid) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_cuts_train.jsonl.gz) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_feats_train) . | ||
popd | ||
touch data/fbank/.fluent_speech_commands.done | ||
else | ||
log "Fluent speech commands dataset already exists, skipping." | ||
fi | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
#!/usr/bin/env bash | ||
|
||
# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674 | ||
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python | ||
|
||
set -eou pipefail | ||
|
||
export CUDA_VISIBLE_DEVICES="0,1,2,3" | ||
export PYTHONPATH=../../../:$PYTHONPATH | ||
|
||
stage=0 | ||
stop_stage=100 | ||
|
||
pre_trained_model_host=github | ||
|
||
. shared/parse_options.sh || exit 1 | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
|
||
if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then | ||
log "Stage -1: Download a pre-trained model." | ||
if [ $pre_trained_model_host -eq "github" ]; then | ||
|
||
elif [$pre_trained_model_host -eq "modelscope" ]; then | ||
|
||
else | ||
log "Pretrained model host : $pre_trained_model_host not support." | ||
exit -1; | ||
fi | ||
fi | ||
|
||
|
||
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then | ||
log "Stage 0: Train a model." | ||
if [ ! -e data/fbank/.gigaspeech.done ]; then | ||
log "You need to run the prepare.sh first." | ||
exit -1 | ||
fi | ||
|
||
python ./zipformer/train.py \ | ||
--world-size 4 \ | ||
--exp-dir zipformer/exp \ | ||
--decoder-dim 320 \ | ||
--joiner-dim 320 \ | ||
--num-encoder-layers 1,1,1,1,1,1 \ | ||
--feedforward-dim 192,192,192,192,192,192 \ | ||
--encoder-dim 128,128,128,128,128,128 \ | ||
--encoder-unmasked-dim 128,128,128,128,128,128 \ | ||
--num-epochs 15 \ | ||
--lr-epochs 1.5 \ | ||
--use-fp16 1 \ | ||
--start-epoch 1 \ | ||
--training-subset L \ | ||
--pinyin-type partial_with_tone \ | ||
--causal 1 \ | ||
--lang-dir data/lang_partial_tone \ | ||
--max-duration 1000 | ||
fi | ||
|
||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then | ||
log "Stage 1: Decode the model." | ||
for t in small, large; do | ||
python ./zipformer/decode.py \ | ||
--epoch 15 \ | ||
--avg 2 \ | ||
--exp-dir ./zipformer/exp \ | ||
--lang-dir ./data/lang_partial_tone \ | ||
--pinyin-type partial_with_tone \ | ||
--causal 1 \ | ||
--chunk-size 16 \ | ||
--left-context-frames 64 \ | ||
--decoder-dim 320 \ | ||
--joiner-dim 320 \ | ||
--num-encoder-layers 1,1,1,1,1,1 \ | ||
--feedforward-dim 192,192,192,192,192,192 \ | ||
--encoder-dim 128,128,128,128,128,128 \ | ||
--encoder-unmasked-dim 128,128,128,128,128,128 \ | ||
--test-set $t \ | ||
--keywords-score 1.0 \ | ||
--keywords-threshold 0.35 \ | ||
--keywords-file ./data/commands_${t}.txt \ | ||
--max-duration 3000 | ||
done | ||
fi | ||
|
||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then | ||
log "Stage 2: Export the model." | ||
|
||
python ./zipformer/export.py \ | ||
--epoch 15 \ | ||
--avg 2 \ | ||
--exp-dir ./zipformer/exp \ | ||
--tokens data/lang_partial_tone/tokens.txt \ | ||
--causal 1 \ | ||
--chunk-size 16 \ | ||
--left-context-frames 64 \ | ||
--decoder-dim 320 \ | ||
--joiner-dim 320 \ | ||
--num-encoder-layers 1,1,1,1,1,1 \ | ||
--feedforward-dim 192,192,192,192,192,192 \ | ||
--encoder-dim 128,128,128,128,128,128 \ | ||
--encoder-unmasked-dim 128,128,128,128,128,128 | ||
|
||
python ./zipformer/export_onnx_streaming.py \ | ||
--exp-dir zipformer/exp \ | ||
--tokens data/lang_partial_tone/tokens.txt \ | ||
--epoch 15 \ | ||
--avg 2 \ | ||
--chunk-size 16 \ | ||
--left-context-frames 128 \ | ||
--decoder-dim 320 \ | ||
--joiner-dim 320 \ | ||
--num-encoder-layers 1,1,1,1,1,1 \ | ||
--feedforward-dim 192,192,192,192,192,192 \ | ||
--encoder-dim 128,128,128,128,128,128 \ | ||
--encoder-unmasked-dim 128,128,128,128,128,128 \ | ||
--causal 1 | ||
fi | ||
|
||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then | ||
log "Stage 2: Finetune the model" | ||
|
||
# The following configuration of lr schedule should work well | ||
# You may also tune the following parameters to adjust learning rate schedule | ||
base_lr=0.0005 | ||
lr_epochs=100 | ||
lr_batches=100000 | ||
|
||
# We recommend to start from an averaged model | ||
finetune_ckpt=zipformer/exp/pretrained.pt | ||
|
||
./zipformer/finetune.py \ | ||
--world-size 4 \ | ||
--num-epochs 10 \ | ||
--start-epoch 1 \ | ||
--exp-dir zipformer/exp_finetune | ||
--lang-dir ./data/lang_partial_tone \ | ||
--pinyin-type partial_with_tone \ | ||
--use-fp16 1 \ | ||
--decoder-dim 320 \ | ||
--joiner-dim 320 \ | ||
--num-encoder-layers 1,1,1,1,1,1 \ | ||
--feedforward-dim 192,192,192,192,192,192 \ | ||
--encoder-dim 128,128,128,128,128,128 \ | ||
--encoder-unmasked-dim 128,128,128,128,128,128 \ | ||
--causal 1 \ | ||
--base-lr $base_lr \ | ||
--lr-epochs $lr_epochs \ | ||
--lr-batches $lr_batches \ | ||
--finetune-ckpt $finetune_ckpt \ | ||
--max-duration 1500 | ||
fi | ||
|
||
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then | ||
log "Stage 1: Decode the finetuned model." | ||
for t in small, large; do | ||
python ./zipformer/decode.py \ | ||
--epoch 15 \ | ||
--avg 2 \ | ||
--exp-dir ./zipformer/exp_finetune \ | ||
--lang-dir ./data/lang_partial_tone \ | ||
--pinyin-type partial_with_tone \ | ||
--causal 1 \ | ||
--chunk-size 16 \ | ||
--left-context-frames 64 \ | ||
--decoder-dim 320 \ | ||
--joiner-dim 320 \ | ||
--num-encoder-layers 1,1,1,1,1,1 \ | ||
--feedforward-dim 192,192,192,192,192,192 \ | ||
--encoder-dim 128,128,128,128,128,128 \ | ||
--encoder-unmasked-dim 128,128,128,128,128,128 \ | ||
--test-set $t \ | ||
--keywords-score 1.0 \ | ||
--keywords-threshold 0.35 \ | ||
--keywords-file ./data/commands_${t}.txt \ | ||
--max-duration 3000 | ||
done | ||
fi | ||
|
||
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then | ||
log "Stage 2: Export the finetuned model." | ||
|
||
python ./zipformer/export_onnx_streaming.py \ | ||
--exp-dir zipformer/exp_finetune \ | ||
--tokens data/lang_partial_tone/tokens.txt \ | ||
--epoch 15 \ | ||
--avg 2 \ | ||
--chunk-size 16 \ | ||
--left-context-frames 128 \ | ||
--decoder-dim 320 \ | ||
--joiner-dim 320 \ | ||
--num-encoder-layers 1,1,1,1,1,1 \ | ||
--feedforward-dim 192,192,192,192,192,192 \ | ||
--encoder-dim 128,128,128,128,128,128 \ | ||
--encoder-unmasked-dim 128,128,128,128,128,128 \ | ||
--causal 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../icefall/shared |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../librispeech/ASR/zipformer/export-onnx-streaming.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../../librispeech/ASR/zipformer/export.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
#!/usr/bin/env bash | ||
|
||
# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674 | ||
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python | ||
|
||
set -eou pipefail | ||
|
||
nj=15 | ||
stage=0 | ||
stop_stage=100 | ||
|
||
. shared/parse_options.sh || exit 1 | ||
|
||
# All files generated by this script are saved in "data". | ||
# You can safely remove "data" and rerun this script to regenerate it. | ||
mkdir -p data | ||
|
||
log() { | ||
# This function is from espnet | ||
local fname=${BASH_SOURCE[1]##*/} | ||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
} | ||
|
||
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then | ||
log "Stage 0: Prepare gigaspeech dataset." | ||
mkdir -p data/fbank | ||
if [ ! -e data/fbank/.gigaspeech.done ]; then | ||
pushd ../ASR | ||
./prepare.sh --stage 0 --stop-stage 9 | ||
./prepare.sh --stage 11 --stop-stage 11 | ||
popd | ||
pushd data/fbank | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_DEV.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_DEV.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_TEST.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_TEST.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_L.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_L.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_M.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_M.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_S.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_S.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_cuts_XS.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/gigaspeech_feats_XS.lca) . | ||
ln -svf $(realpath ../ASR/data/fbank/XL_split) . | ||
ln -svf $(realpath ../ASR/data/fbank/musan_cuts.jsonl.gz) . | ||
ln -svf $(realpath ../ASR/data/fbank/musan_feats) . | ||
popd | ||
pushd data | ||
ln -svf $(realpath ../ASR/data/lang_bpe_500) . | ||
popd | ||
else | ||
log "Gigaspeech dataset already exists, skipping." | ||
fi | ||
fi | ||
|
||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then | ||
log "Stage 1: Prepare open commands dataset." | ||
mkdir -p data/fbank | ||
if [ ! -e data/fbank/.fluent_speech_commands.done ]; then | ||
pushd data | ||
git clone https://github.com/pkufool/open-commands.git | ||
ln -svf $(realpath ./open-commands/EN/small/commands.txt) commands_small.txt | ||
ln -svf $(realpath ./open-commands/EN/large/commands.txt) commands_large.txt | ||
pushd open-commands | ||
./script/prepare.sh --stage 3 --stop-stage 3 | ||
./script/prepare.sh --stage 6 --stop-stage 6 | ||
popd | ||
popd | ||
pushd data/fbank | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_cuts_large.jsonl.gz) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_feats_large) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_cuts_small.jsonl.gz) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_feats_small) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_cuts_valid.jsonl.gz) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_feats_valid) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_cuts_train.jsonl.gz) . | ||
ln -svf $(realpath ../open-commands/data/fbank/fluent_speech_commands_feats_train) . | ||
popd | ||
touch data/fbank/.fluent_speech_commands.done | ||
else | ||
log "Fluent speech commands dataset already exists, skipping." | ||
fi | ||
fi |
Oops, something went wrong.