From 4ebccebcc01da5aaed006d9c215b012a0fd2306d Mon Sep 17 00:00:00 2001 From: root Date: Tue, 11 Jun 2024 09:17:31 +0000 Subject: [PATCH] removing debug log --- egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py b/egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py index 603a6e0af..b51ebcfe3 100755 --- a/egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py +++ b/egs/speech_llm/ASR_LLM/whisper_llm_zh/decode.py @@ -134,7 +134,7 @@ def add_model_arguments(parser: argparse.ArgumentParser): parser.add_argument( "--encoder-projector-ds-rate", type=int, - default=4, + default=1, help="Downsample rate for the encoder projector.", ) @@ -290,11 +290,6 @@ def decode_one_batch( {"role": "user", "content": f"{DEFAULT_SPEECH_TOKEN}请转写音频为文字"}, {"role": "assistant", "content": ""}, ]] * len(feature) - # messages = [[ - # {"role": "system", "content": "你是一个能处理音频的助手。"}, - # {"role": "user", "content": f"请转写音频为文字 {DEFAULT_SPEECH_TOKEN}"}, - # {"role": "assistant", "content": ""}, - # ]] * len(feature) input_ids, attention_mask = preprocess( messages, tokenizer, max_len=128 @@ -302,13 +297,7 @@ def decode_one_batch( generated_ids = model.decode(feature, input_ids.to(device, dtype=torch.long), attention_mask.to(device)) hyps = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) - hyps = tokenizer.batch_decode(generated_ids, skip_special_tokens=False) - - print(hyps) - texts = batch["supervisions"]["text"] - for i, text in enumerate(texts): - print(f"ref: {text}") - print(f"hyp: {hyps[i]}") + return {"beam-search": hyps}