Merge pull request #313 from glynpu/fix_comments

fix comments
2025-12-11 06:55:27 +00:00 · 2022-04-13 14:03:02 +08:00 · 2022-04-13 14:03:02 +08:00 · c0003483d3
commit c0003483d3
parent 2a854f5607 78418ac37c
2 changed files with 3 additions and 3 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py
@ -367,7 +367,7 @@ class ActivationBalancer(torch.nn.Module):
           min_positive: the minimum, per channel, of the proportion of the time
               that (x > 0), below which we start to modify the derivatives.
           max_positive: the maximum, per channel, of the proportion of the time
-               that (x > 0), below which we start to modify the derivatives.
+               that (x > 0), above which we start to modify the derivatives.
           max_factor: the maximum factor by which we modify the derivatives for
              either the sign constraint or the magnitude constraint;
              e.g. with max_factor=0.02, the the derivatives would be multiplied by
@ -413,7 +413,7 @@ class DoubleSwishFunction(torch.autograd.Function):
    """
      double_swish(x) = x * torch.sigmoid(x-1)
    This is a definition, originally motivated by its close numerical
-    similarity to swish(swish(x), where swish(x) =  x * sigmoid(x).
+    similarity to swish(swish(x)), where swish(x) =  x * sigmoid(x).

    Memory-efficient derivative computation:
     double_swish(x) = x * s, where s(x) = torch.sigmoid(x-1)