From 02eb7af82474a4801addf8c3a36a1b150fa851c7 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Thu, 6 Oct 2022 13:01:36 +0800
Subject: [PATCH] Don't always apply the frame mask

---
 .../ASR/pruned_transducer_stateless7/conformer.py         | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
index 9dfd682a5..f47183e2b 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
@@ -330,6 +330,14 @@ class ConformerEncoder(nn.Module):
             # frame_mask is 0 with probability `feature_mask_dropout_prob`
             # frame_mask shape: (S, N, 1)
             frame_mask = (torch.rand_like(src[...,:1]) > feature_mask_dropout_prob).to(src.dtype)
+
+            # for 10% of sequences, make the frame mask always-1, i.e. don't drop out any of
+            # the frames.  This is to make sure the model sometimes "sees" the same types of
+            # un-perturbed sequences that it will see in test time.
+            frame_mask = torch.logical_or(frame_mask,
+                                          torch.rand_like(src[:,:1,:1]) < 0.1)
+
+
             feature_mask[..., feature_unmasked_dim:] *= frame_mask