diff --git a/egs/multi_zh-hans/ASR/zipformer/pretrained.py b/egs/multi_zh-hans/ASR/zipformer/pretrained.py index c15db11f7..1b53465c0 100755 --- a/egs/multi_zh-hans/ASR/zipformer/pretrained.py +++ b/egs/multi_zh-hans/ASR/zipformer/pretrained.py @@ -328,9 +328,14 @@ def main(): logging.info(msg) def token_ids_to_words(token_ids: List[int]) -> str: - text = "" + byte_list = [] for i in token_ids: - text += token_table[i] + token = token_table[i] + if token.startswith("<0x") and token.endswith(">"): + byte_list.append(int(token[3:-1], 16)) + else: + byte_list += list(token.encode("utf-8")) + text = bytes(byte_list).decode("utf-8", errors='ignore') return text.replace("▁", " ").strip() if params.method == "fast_beam_search":