From 135be1e19cd7ecbe29454153601f94a878ebd6b4 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 8 Jun 2022 00:42:04 +0800 Subject: [PATCH] Change dropout_rate from 0.2 to 0.1; fix logging statement; fix assignment to rand_scales, nonrand_scales to use [:] --- egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py | 6 +++--- .../ASR/pruned_transducer_stateless5/conformer.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py index 8ff009a3f..cd3bc07c6 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py @@ -786,7 +786,7 @@ class Decorrelate(torch.nn.Module): U, S, _ = norm_cov.svd() if random.random() < 0.1: - print("Decorrelate: max,min eig of normalized cov is: {S.max().item():.2e},{S.min().item():.2e}") + logging.info(f"Decorrelate: max,min eig of normalized cov is: {S.max().item():.2e},{S.min().item():.2e}") # row indexes of U correspond to channels, column indexes correspond to # singular values: cov = U * diag(S) * U.t() where * is matmul. @@ -817,8 +817,8 @@ class Decorrelate(torch.nn.Module): # rand_proportion is viewed as representing a proportion of the covariance, since # the random and nonrandom components will not be correlated. - self.rand_scales = rand_proportion.sqrt() - self.nonrand_scales = (1.0 - rand_proportion).sqrt() + self.rand_scales[:] = rand_proportion.sqrt() + self.nonrand_scales[:] = (1.0 - rand_proportion).sqrt() if True: diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py index f7fd6ce61..138950d55 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/conformer.py @@ -199,7 +199,7 @@ class ConformerEncoderLayer(nn.Module): ) self.dropout = torch.nn.Dropout(dropout) - self.decorrelate = Decorrelate(d_model, apply_prob=0.25, dropout_rate=0.2) + self.decorrelate = Decorrelate(d_model, apply_prob=0.25, dropout_rate=0.1) def forward(