diff --git a/egs/multi_zh_en/ASR/zipformer/decode.py b/egs/multi_zh_en/ASR/zipformer/decode.py
index a717df0622..e21e8f052c 100755
--- a/egs/multi_zh_en/ASR/zipformer/decode.py
+++ b/egs/multi_zh_en/ASR/zipformer/decode.py
@@ -130,7 +130,6 @@
 from icefall.lexicon import Lexicon
 from icefall.utils import (
     AttributeDict,
-    make_pad_mask,
     setup_logger,
     store_transcripts,
     str2bool,
diff --git a/egs/multi_zh_en/ASR/zipformer/pretrained.py b/egs/multi_zh_en/ASR/zipformer/pretrained.py
index e2bc6f9caf..676272e1fc 100755
--- a/egs/multi_zh_en/ASR/zipformer/pretrained.py
+++ b/egs/multi_zh_en/ASR/zipformer/pretrained.py
@@ -115,6 +115,7 @@
 
 import k2
 import kaldifeat
+import sentencepiece as spm
 import torch
 import torchaudio
 from beam_search import (
@@ -126,7 +127,7 @@
 from torch.nn.utils.rnn import pad_sequence
 from train import add_model_arguments, get_model, get_params
 
-from icefall.utils import make_pad_mask
+from icefall import smart_byte_decode
 
 
 def get_parser():
@@ -144,9 +145,9 @@ def get_parser():
     )
 
     parser.add_argument(
-        "--tokens",
+        "--bpe-model",
         type=str,
-        help="""Path to tokens.txt.""",
+        help="""Path to byte-level bpe model.""",
     )
 
     parser.add_argument(
@@ -263,11 +264,13 @@ def main():
 
     params.update(vars(args))
 
-    token_table = k2.SymbolTable.from_file(params.tokens)
+    sp = spm.SentencePieceProcessor()
+    sp.load(params.bpe_model)
 
-    params.blank_id = token_table["<blk>"]
-    params.unk_id = token_table["<unk>"]
-    params.vocab_size = num_tokens(token_table) + 1
+    # <blk> and <unk> are defined in local/train_bpe_model.py
+    params.blank_id = sp.piece_to_id("<blk>")
+    params.unk_id = sp.piece_to_id("<unk>")
+    params.vocab_size = sp.get_piece_size()
 
     logging.info(f"{params}")
 
@@ -326,12 +329,6 @@ def main():
     msg = f"Using {params.method}"
     logging.info(msg)
 
-    def token_ids_to_words(token_ids: List[int]) -> str:
-        text = ""
-        for i in token_ids:
-            text += token_table[i]
-        return text.replace("▁", " ").strip()
-
     if params.method == "fast_beam_search":
         decoding_graph = k2.trivial_graph(params.vocab_size - 1, device=device)
         hyp_tokens = fast_beam_search_one_best(
@@ -343,8 +340,8 @@ def token_ids_to_words(token_ids: List[int]) -> str:
             max_contexts=params.max_contexts,
             max_states=params.max_states,
         )
-        for hyp in hyp_tokens:
-            hyps.append(token_ids_to_words(hyp))
+        for hyp in sp.decode(hyp_tokens):
+            hyps.append(smart_byte_decode(hyp).split())
     elif params.method == "modified_beam_search":
         hyp_tokens = modified_beam_search(
             model=model,
@@ -353,16 +350,16 @@ def token_ids_to_words(token_ids: List[int]) -> str:
             beam=params.beam_size,
         )
 
-        for hyp in hyp_tokens:
-            hyps.append(token_ids_to_words(hyp))
+        for hyp in sp.decode(hyp_tokens):
+            hyps.append(smart_byte_decode(hyp).split())
     elif params.method == "greedy_search" and params.max_sym_per_frame == 1:
         hyp_tokens = greedy_search_batch(
             model=model,
             encoder_out=encoder_out,
             encoder_out_lens=encoder_out_lens,
         )
-        for hyp in hyp_tokens:
-            hyps.append(token_ids_to_words(hyp))
+        for hyp in sp.decode(hyp_tokens):
+            hyps.append(smart_byte_decode(hyp).split())
     else:
         raise ValueError(f"Unsupported method: {params.method}")