diff --git a/egs/aishell/ASR/prepare.sh b/egs/aishell/ASR/prepare.sh index 9de060e73d..d5dbe5726d 100755 --- a/egs/aishell/ASR/prepare.sh +++ b/egs/aishell/ASR/prepare.sh @@ -204,10 +204,6 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then if [ ! -f $lang_char_dir/L_disambig.pt ]; then ./local/prepare_char.py --lang-dir $lang_char_dir fi - - if [ ! -f $lang_char_dir/HLG.fst ]; then - ./local/prepare_lang_fst.py --lang-dir $lang_phone_dir --ngram-G ./data/lm/G_3_gram.fst.txt - fi fi if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then @@ -262,6 +258,13 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then --max-order=3 \ data/lm/3-gram.unpruned.arpa > data/lm/G_3_gram_char.fst.txt fi + + if [ ! -f $lang_char_dir/HLG.fst ]; then + lang_phone_dir=data/lang_phone + ./local/prepare_lang_fst.py \ + --lang-dir $lang_phone_dir \ + --ngram-G ./data/lm/G_3_gram.fst.txt + fi fi if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then diff --git a/egs/librispeech/ASR/prepare.sh b/egs/librispeech/ASR/prepare.sh index 93d010ea89..739608572b 100755 --- a/egs/librispeech/ASR/prepare.sh +++ b/egs/librispeech/ASR/prepare.sh @@ -242,10 +242,6 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then $lang_dir/L_disambig.pt \ $lang_dir/L_disambig.fst fi - - if [ ! -f $lang_dir/HL.fst ]; then - ./local/prepare_lang_fst.py --lang-dir $lang_dir --ngram-G ./data/lm/G_3_gram.fst.txt - fi done fi @@ -303,6 +299,16 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then --max-order=4 \ $dl_dir/lm/4-gram.arpa > data/lm/G_4_gram.fst.txt fi + + for vocab_size in ${vocab_sizes[@]}; do + lang_dir=data/lang_bpe_${vocab_size} + + if [ ! -f $lang_dir/HL.fst ]; then + ./local/prepare_lang_fst.py \ + --lang-dir $lang_dir \ + --ngram-G ./data/lm/G_3_gram.fst.txt + fi + done fi if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then