Fix wenetspeech tokenizer

2025-12-11 06:55:27 +00:00 · 2023-05-16 18:37:21 +08:00 · 2023-05-16 18:37:21 +08:00 · d7daa3b718
commit d7daa3b718
parent 949e49eec8
1 changed files with 7 additions and 2 deletions
--- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py
+++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py
@ -92,7 +92,7 @@ When training with the L subset, the streaming usage:
        --causal-convolution 1 \
        --decode-chunk-size 16 \
        --left-context 64
-        
+
 (4) modified beam search with RNNLM shallow fusion
 ./pruned_transducer_stateless5/decode.py \
    --epoch 35 \
@ -853,7 +853,12 @@ def main():
        if os.path.exists(params.context_file):
            contexts = []
            for line in open(params.context_file).readlines():
-                contexts.append(graph_compiler.texts_to_ids(line.strip()))
+                context_list = graph_compiler.texts_to_ids(line.strip())
                tmp = []
                for context in context_list:
                    for x in context:
                        tmp.append(x)
                contexts.append(tmp)
            context_graph = ContextGraph(params.context_score)
            context_graph.build(contexts)
        else: