diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py index acb669497..d1333b876 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py @@ -29,7 +29,6 @@ from s import ( DoubleSwish, ScaledConv1d, ScaledLinear, # not as in other dirs.. just scales down initial parameter values. - LearnedScale, ) from torch import Tensor, nn diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py index 498364794..8432e4a47 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py @@ -326,30 +326,6 @@ def ScaledConv1d(*args, return ans -class LearnedScale(torch.nn.Module): - """ - Module that learns a scale dependent on some kind of mask that is typically going to be 0 or 1 - in training. The scale will be 1.0 if the mask is 1.0, but may be a different (learned) value - if the mask value is not 1.0. - - The idea is that if we have some kind of feature mask that would always be 1.0 in - test mode but might sometimes be 0.0 in training mode, we might want the multiply - the remaining features by a value dependent on this mask. - """ - def __init__(self): - super(LearnedScale, self).__init__() - self.alpha = nn.Parameter(torch.tensor(0.0)) - - def forward(self, - x: Tensor, - mask: Tensor): - """ - Mask should either be a number (probably 1.0) or a tensors that broadcasts with x. - """ - if self.training and mask is 1.0: - return x - return x * (1.0 + self.alpha * (1.0 - mask)) - class ActivationBalancer(torch.nn.Module): """