diff --git a/egs/speech_llm/ASR_LLM/whisper_llm_zh/ds_config_zero1.json b/egs/speech_llm/ASR_LLM/whisper_llm_zh/ds_config_zero1.json index 730937a21..29e710e3c 100644 --- a/egs/speech_llm/ASR_LLM/whisper_llm_zh/ds_config_zero1.json +++ b/egs/speech_llm/ASR_LLM/whisper_llm_zh/ds_config_zero1.json @@ -5,7 +5,7 @@ "loss_scale_window": 100, "initial_scale_power": 16, "hysteresis": 2, - "min_loss_scale": 0.01 + "min_loss_scale": 1 }, "zero_optimization": { "stage": 1, diff --git a/egs/speech_llm/ASR_LLM/zipformer_llm_zh/model.py b/egs/speech_llm/ASR_LLM/zipformer_llm_zh/model.py index d585ec871..b7ad888cd 100644 --- a/egs/speech_llm/ASR_LLM/zipformer_llm_zh/model.py +++ b/egs/speech_llm/ASR_LLM/zipformer_llm_zh/model.py @@ -64,7 +64,6 @@ class SPEECH_LLM(nn.Module): self, encoder_embed: nn.Module, encoder: EncoderInterface, - ctc_output: nn.Module, llm: nn.Module, encoder_projector: nn.Module, ):