mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
Fix decoding byte bpes tokens to words. (#1966)
This commit is contained in:
parent
762f965cf7
commit
3587c4b3b7
@ -328,9 +328,14 @@ def main():
|
|||||||
logging.info(msg)
|
logging.info(msg)
|
||||||
|
|
||||||
def token_ids_to_words(token_ids: List[int]) -> str:
|
def token_ids_to_words(token_ids: List[int]) -> str:
|
||||||
text = ""
|
byte_list = []
|
||||||
for i in token_ids:
|
for i in token_ids:
|
||||||
text += token_table[i]
|
token = token_table[i]
|
||||||
|
if token.startswith("<0x") and token.endswith(">"):
|
||||||
|
byte_list.append(int(token[3:-1], 16))
|
||||||
|
else:
|
||||||
|
byte_list += list(token.encode("utf-8"))
|
||||||
|
text = bytes(byte_list).decode("utf-8", errors='ignore')
|
||||||
return text.replace("▁", " ").strip()
|
return text.replace("▁", " ").strip()
|
||||||
|
|
||||||
if params.method == "fast_beam_search":
|
if params.method == "fast_beam_search":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user