Fixed the issue of errors in fully silent sentences during evaluation.

2025-12-11 06:55:27 +00:00 · 2023-05-22 16:46:15 +08:00 · 2023-05-22 16:46:15 +08:00 · e49fe2e7c4
commit e49fe2e7c4
parent 7c4ff66a3d
1 changed files with 6 additions and 2 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/frame_reducer.py
@ -74,9 +74,13 @@ class FrameReducer(nn.Module):
        padding_mask = make_pad_mask(x_lens)
        non_blank_mask = (ctc_output[:, :, blank_id] < math.log(0.9)) * (~padding_mask)
-        if y_lens is not None:
+        if y_lens is not None or self.training == False:
            # Limit the maximum number of reduced frames
            if y_lens is not None:
                limit_lens = T - y_lens
            else:
                # In eval mode, ensure audio that is completely silent does not make any errors
                limit_lens = torch.ones_like(x_lens)
            max_limit_len = limit_lens.max().int()
            fake_limit_indexes = torch.topk(
                ctc_output[:, :, blank_id], max_limit_len