From 02eb7af82474a4801addf8c3a36a1b150fa851c7 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 6 Oct 2022 13:01:36 +0800 Subject: [PATCH] Don't always apply the frame mask --- .../ASR/pruned_transducer_stateless7/conformer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py index 9dfd682a5..f47183e2b 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py @@ -330,6 +330,14 @@ class ConformerEncoder(nn.Module): # frame_mask is 0 with probability `feature_mask_dropout_prob` # frame_mask shape: (S, N, 1) frame_mask = (torch.rand_like(src[...,:1]) > feature_mask_dropout_prob).to(src.dtype) + + # for 10% of sequences, make the frame mask always-1, i.e. don't drop out any of + # the frames. This is to make sure the model sometimes "sees" the same types of + # un-perturbed sequences that it will see in test time. + frame_mask = torch.logical_or(frame_mask, + torch.rand_like(src[:,:1,:1]) < 0.1) + + feature_mask[..., feature_unmasked_dim:] *= frame_mask