diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py index 5eb301c89..51d5bfb7f 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py @@ -795,7 +795,7 @@ class Decorrelate(torch.nn.Module): def __init__(self, num_channels: int, scale: float = 0.1, - apply_prob_decay: int = 1000, + apply_prob_decay: int = 2000, eps: float = 1.0e-05, beta: float = 0.95, channel_dim: int = -1): diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py index b1cf5cfe2..0a85841fe 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py @@ -199,7 +199,7 @@ class ConformerEncoderLayer(nn.Module): ) self.dropout = nn.Dropout(dropout) - self.decorrelate = Decorrelate(d_model, scale=0.04) + self.decorrelate = Decorrelate(d_model, scale=0.02) def forward(