From e49fe2e7c439a1e4145035af7f2f8b4df935ba66 Mon Sep 17 00:00:00 2001
From: drawfish <duisheng.chen@gmail.com>
Date: Mon, 22 May 2023 16:46:15 +0800
Subject: [PATCH] Fixed the issue of errors in fully silent sentences during
 evaluation.

---
 .../pruned_transducer_stateless7_ctc_bs/frame_reducer.py  | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py
index 0841f7cf1..9798fd4fe 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py
@@ -74,9 +74,13 @@ class FrameReducer(nn.Module):
         padding_mask = make_pad_mask(x_lens)
         non_blank_mask = (ctc_output[:, :, blank_id] < math.log(0.9)) * (~padding_mask)
 
-        if y_lens is not None:
+        if y_lens is not None or self.training == False:
             # Limit the maximum number of reduced frames
-            limit_lens = T - y_lens
+            if y_lens is not None:
+                limit_lens = T - y_lens
+            else:
+                # In eval mode, ensure audio that is completely silent does not make any errors
+                limit_lens = torch.ones_like(x_lens)
             max_limit_len = limit_lens.max().int()
             fake_limit_indexes = torch.topk(
                 ctc_output[:, :, blank_id], max_limit_len