diff --git a/egs/multi_zh_en/ASR/zipformer/decode.py b/egs/multi_zh_en/ASR/zipformer/decode.py index a717df0622..e21e8f052c 100755 --- a/egs/multi_zh_en/ASR/zipformer/decode.py +++ b/egs/multi_zh_en/ASR/zipformer/decode.py @@ -130,7 +130,6 @@ from icefall.lexicon import Lexicon from icefall.utils import ( AttributeDict, - make_pad_mask, setup_logger, store_transcripts, str2bool, diff --git a/egs/multi_zh_en/ASR/zipformer/pretrained.py b/egs/multi_zh_en/ASR/zipformer/pretrained.py index e2bc6f9caf..676272e1fc 100755 --- a/egs/multi_zh_en/ASR/zipformer/pretrained.py +++ b/egs/multi_zh_en/ASR/zipformer/pretrained.py @@ -115,6 +115,7 @@ import k2 import kaldifeat +import sentencepiece as spm import torch import torchaudio from beam_search import ( @@ -126,7 +127,7 @@ from torch.nn.utils.rnn import pad_sequence from train import add_model_arguments, get_model, get_params -from icefall.utils import make_pad_mask +from icefall import smart_byte_decode def get_parser(): @@ -144,9 +145,9 @@ def get_parser(): ) parser.add_argument( - "--tokens", + "--bpe-model", type=str, - help="""Path to tokens.txt.""", + help="""Path to byte-level bpe model.""", ) parser.add_argument( @@ -263,11 +264,13 @@ def main(): params.update(vars(args)) - token_table = k2.SymbolTable.from_file(params.tokens) + sp = spm.SentencePieceProcessor() + sp.load(params.bpe_model) - params.blank_id = token_table[""] - params.unk_id = token_table[""] - params.vocab_size = num_tokens(token_table) + 1 + # and are defined in local/train_bpe_model.py + params.blank_id = sp.piece_to_id("") + params.unk_id = sp.piece_to_id("") + params.vocab_size = sp.get_piece_size() logging.info(f"{params}") @@ -326,12 +329,6 @@ def main(): msg = f"Using {params.method}" logging.info(msg) - def token_ids_to_words(token_ids: List[int]) -> str: - text = "" - for i in token_ids: - text += token_table[i] - return text.replace("▁", " ").strip() - if params.method == "fast_beam_search": decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device) hyp_tokens = fast_beam_search_one_best( @@ -343,8 +340,8 @@ def token_ids_to_words(token_ids: List[int]) -> str: max_contexts=params.max_contexts, max_states=params.max_states, ) - for hyp in hyp_tokens: - hyps.append(token_ids_to_words(hyp)) + for hyp in sp.decode(hyp_tokens): + hyps.append(smart_byte_decode(hyp).split()) elif params.method == "modified_beam_search": hyp_tokens = modified_beam_search( model=model, @@ -353,16 +350,16 @@ def token_ids_to_words(token_ids: List[int]) -> str: beam=params.beam_size, ) - for hyp in hyp_tokens: - hyps.append(token_ids_to_words(hyp)) + for hyp in sp.decode(hyp_tokens): + hyps.append(smart_byte_decode(hyp).split()) elif params.method == "greedy_search" and params.max_sym_per_frame == 1: hyp_tokens = greedy_search_batch( model=model, encoder_out=encoder_out, encoder_out_lens=encoder_out_lens, ) - for hyp in hyp_tokens: - hyps.append(token_ids_to_words(hyp)) + for hyp in sp.decode(hyp_tokens): + hyps.append(smart_byte_decode(hyp).split()) else: raise ValueError(f"Unsupported method: {params.method}")