From e49fe2e7c439a1e4145035af7f2f8b4df935ba66 Mon Sep 17 00:00:00 2001 From: drawfish Date: Mon, 22 May 2023 16:46:15 +0800 Subject: [PATCH] Fixed the issue of errors in fully silent sentences during evaluation. --- .../pruned_transducer_stateless7_ctc_bs/frame_reducer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py index 0841f7cf1..9798fd4fe 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py @@ -74,9 +74,13 @@ class FrameReducer(nn.Module): padding_mask = make_pad_mask(x_lens) non_blank_mask = (ctc_output[:, :, blank_id] < math.log(0.9)) * (~padding_mask) - if y_lens is not None: + if y_lens is not None or self.training == False: # Limit the maximum number of reduced frames - limit_lens = T - y_lens + if y_lens is not None: + limit_lens = T - y_lens + else: + # In eval mode, ensure audio that is completely silent does not make any errors + limit_lens = torch.ones_like(x_lens) max_limit_len = limit_lens.max().int() fake_limit_indexes = torch.topk( ctc_output[:, :, blank_id], max_limit_len