From 1962fe298b713a673cc4fd99c20e1deab45e2560 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 15 Mar 2022 14:35:15 +0800 Subject: [PATCH] Add deriv-balancer at output of embedding. --- egs/librispeech/ASR/conformer_ctc/subsampling.py | 3 +++ egs/librispeech/ASR/transducer_stateless/train.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/conformer_ctc/subsampling.py b/egs/librispeech/ASR/conformer_ctc/subsampling.py index 7c2b1ec04..35de71e43 100644 --- a/egs/librispeech/ASR/conformer_ctc/subsampling.py +++ b/egs/librispeech/ASR/conformer_ctc/subsampling.py @@ -57,6 +57,8 @@ class Conv2dSubsampling(nn.Module): ) self.out = ScaledLinear(odim * (((idim - 1) // 2 - 1) // 2), odim) self.out_norm = BasicNorm(odim) + # constrain mean of output to be close to zero. + self.out_balancer = DerivBalancer(channel_dim=-1, min_positive=0.4, max_positive=0.6) self._reset_parameters() def _reset_parameters(self): @@ -84,6 +86,7 @@ class Conv2dSubsampling(nn.Module): x = self.out(x.transpose(1, 2).contiguous().view(b, t, c * f)) # Now x is of shape (N, ((T-1)//2 - 1))//2, odim) x = self.out_norm(x) + x = self.out_balancer(x) return x diff --git a/egs/librispeech/ASR/transducer_stateless/train.py b/egs/librispeech/ASR/transducer_stateless/train.py index 6408290b4..488de3ccc 100755 --- a/egs/librispeech/ASR/transducer_stateless/train.py +++ b/egs/librispeech/ASR/transducer_stateless/train.py @@ -110,7 +110,7 @@ def get_parser(): parser.add_argument( "--exp-dir", type=str, - default="transducer_stateless/randcombine1_expscale3_rework2c_maxabs1000_maxp0.95_noexp_convderiv3warmup", + default="transducer_stateless/randcombine1_expscale3_rework2c_maxabs1000_maxp0.95_noexp_convderiv3warmup_embed", help="""The experiment dir. It specifies the directory where all training related files, e.g., checkpoints, log, etc, are saved