Reduce layer-drop prob after warmup to 1 in 100

This commit is contained in:
Daniel Povey 2022-03-27 00:25:32 +08:00
parent b43468bb67
commit 953aecf5e3

View File

@ -231,7 +231,7 @@ class ConformerEncoderLayer(nn.Module):
# compensate for the small scale by just producing larger output.
warmup = max(warmup, 0.1)
if self.training:
warmup = min(warmup, 0.98) # effectively, layer-drop with 1-in-50 prob.
warmup = min(warmup, 0.99) # effectively, layer-drop with 1-in-100 prob.
alpha = 1.0 if torch.rand(()).item() <= warmup else 0.1
# macaron style feed forward module