forked from k2-fsa/icefall
-
Notifications
You must be signed in to change notification settings - Fork 0
/
finetune.sh
executable file
·82 lines (65 loc) · 2.59 KB
/
finetune.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env bash
# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail
stage=-1
stop_stage=100
# This is an example script for fine-tuning. Here, we fine-tune a model trained
# on WenetSpeech on Aishell. The model used for fine-tuning is
# pruned_transducer_stateless2 (zipformer). If you want to fine-tune model
# from another recipe, you can adapt ./pruned_transducer_stateless2/finetune.py
# for that recipe. If you have any problem, please open up an issue in https://github.com/k2-fsa/icefall/issues.
# We assume that you have already prepared the Aishell manfiest&features under ./data.
# If you haven't done that, please see https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/prepare.sh.
. shared/parse_options.sh || exit 1
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
log "Stage -1: Download Pre-trained model"
# clone from huggingface
git lfs install
git clone https://huggingface.co/luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2
fi
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
log "Stage 0: Start fine-tuning"
# The following configuration of lr schedule should work well
# You may also tune the following parameters to adjust learning rate schedule
initial_lr=0.0001
lr_epochs=100
lr_batches=100000
# We recommend to start from an averaged model
finetune_ckpt=icefall_asr_wenetspeech_pruned_transducer_stateless2/exp/pretrained_epoch_10_avg_2.pt
lang_dir=icefall_asr_wenetspeech_pruned_transducer_stateless2/data/lang_char
export CUDA_VISIBLE_DEVICES="0,1"
./pruned_transducer_stateless2/finetune.py \
--world-size 2 \
--master-port 18180 \
--num-epochs 15 \
--context-size 2 \
--exp-dir pruned_transducer_stateless2/exp_aishell_finetune \
--initial-lr $initial_lr \
--lr-epochs $lr_epochs \
--lr-batches $lr_batches \
--lang-dir $lang_dir \
--do-finetune True \
--finetune-ckpt $finetune_ckpt \
--max-duration 200
fi
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
log "Stage 1: Decoding"
epoch=4
avg=4
for m in greedy_search modified_beam_search; do
python pruned_transducer_stateless2/decode_aishell.py \
--epoch $epoch \
--avg $avg \
--context-size 2 \
--beam-size 4 \
--exp-dir pruned_transducer_stateless2/exp_aishell_finetune \
--max-duration 400 \
--decoding-method $m
done
fi