mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
Fix decoding byte bpes tokens to words. (#1966)
This commit is contained in:
parent
762f965cf7
commit
3587c4b3b7
@ -328,9 +328,14 @@ def main():
|
||||
logging.info(msg)
|
||||
|
||||
def token_ids_to_words(token_ids: List[int]) -> str:
|
||||
text = ""
|
||||
byte_list = []
|
||||
for i in token_ids:
|
||||
text += token_table[i]
|
||||
token = token_table[i]
|
||||
if token.startswith("<0x") and token.endswith(">"):
|
||||
byte_list.append(int(token[3:-1], 16))
|
||||
else:
|
||||
byte_list += list(token.encode("utf-8"))
|
||||
text = bytes(byte_list).decode("utf-8", errors='ignore')
|
||||
return text.replace("▁", " ").strip()
|
||||
|
||||
if params.method == "fast_beam_search":
|
||||
|
Loading…
x
Reference in New Issue
Block a user