From 78418ac37cdcfa1c4d0f54fe77901f74644ff96a Mon Sep 17 00:00:00 2001 From: Guo Liyong Date: Wed, 13 Apr 2022 13:09:24 +0800 Subject: [PATCH] fix comments --- egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py | 4 ++-- icefall/diagnostics.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py index d59aa2160..f89d2963e 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py @@ -367,7 +367,7 @@ class ActivationBalancer(torch.nn.Module): min_positive: the minimum, per channel, of the proportion of the time that (x > 0), below which we start to modify the derivatives. max_positive: the maximum, per channel, of the proportion of the time - that (x > 0), below which we start to modify the derivatives. + that (x > 0), above which we start to modify the derivatives. max_factor: the maximum factor by which we modify the derivatives for either the sign constraint or the magnitude constraint; e.g. with max_factor=0.02, the the derivatives would be multiplied by @@ -413,7 +413,7 @@ class DoubleSwishFunction(torch.autograd.Function): """ double_swish(x) = x * torch.sigmoid(x-1) This is a definition, originally motivated by its close numerical - similarity to swish(swish(x), where swish(x) = x * sigmoid(x). + similarity to swish(swish(x)), where swish(x) = x * sigmoid(x). Memory-efficient derivative computation: double_swish(x) = x * s, where s(x) = torch.sigmoid(x-1) diff --git a/icefall/diagnostics.py b/icefall/diagnostics.py index ce4ac1464..bc8fe3069 100644 --- a/icefall/diagnostics.py +++ b/icefall/diagnostics.py @@ -111,7 +111,7 @@ def get_diagnostics_for_dim( options object sizes_same: True if all the tensor sizes are the same on this dimension - stats_type: either "abs" or "positive" or "eigs" or "value", + stats_type: either "abs" or "positive" or "eigs" or "value", imdictates the type of stats we accumulate, abs is mean absolute value, "positive" is proportion of positive to nonnegative values, "eigs" is eigenvalues after doing outer product on this dim, sum