From d41b73000eaa4b2c5f7fd2e0137a6c2d75aa2b58 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 31 Mar 2023 13:25:39 +0800 Subject: [PATCH] Modify feature_mask_dropout_prob --- .../ASR/pruned_transducer_stateless7/zipformer.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 3102bf84d..0a25ca8f4 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -297,21 +297,20 @@ class Zipformer2(EncoderInterface): num_frames_max = (num_frames0 + max_downsampling_factor - 1) # we divide the dropped-out feature dimensions into two equal groups; - # the first group is dropped out with probability 0.05, the second - # with probability approximately (0.2 + 0.05). - feature_mask_dropout_prob1 = 0.05 - feature_mask_dropout_prob2 = 0.2 + # the first group is dropped out with probability 0.1, the second + # with probability approximately twice that. + feature_mask_dropout_prob = 0.1 # frame_mask_max1 shape: (num_frames_max, batch_size, 1) frame_mask_max1 = (torch.rand(num_frames_max, batch_size, 1, device=x.device) > - feature_mask_dropout_prob1).to(x.dtype) + feature_mask_dropout_prob).to(x.dtype) # frame_mask_max2 has additional frames masked, about twice the number. frame_mask_max2 = torch.logical_and(frame_mask_max1, (torch.rand(num_frames_max, batch_size, 1, device=x.device) > - feature_mask_dropout_prob2).to(x.dtype)) + feature_mask_dropout_prob).to(x.dtype)) # dim: (num_frames_max, batch_size, 3)