mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
commit
c0003483d3
@ -367,7 +367,7 @@ class ActivationBalancer(torch.nn.Module):
|
|||||||
min_positive: the minimum, per channel, of the proportion of the time
|
min_positive: the minimum, per channel, of the proportion of the time
|
||||||
that (x > 0), below which we start to modify the derivatives.
|
that (x > 0), below which we start to modify the derivatives.
|
||||||
max_positive: the maximum, per channel, of the proportion of the time
|
max_positive: the maximum, per channel, of the proportion of the time
|
||||||
that (x > 0), below which we start to modify the derivatives.
|
that (x > 0), above which we start to modify the derivatives.
|
||||||
max_factor: the maximum factor by which we modify the derivatives for
|
max_factor: the maximum factor by which we modify the derivatives for
|
||||||
either the sign constraint or the magnitude constraint;
|
either the sign constraint or the magnitude constraint;
|
||||||
e.g. with max_factor=0.02, the the derivatives would be multiplied by
|
e.g. with max_factor=0.02, the the derivatives would be multiplied by
|
||||||
@ -413,7 +413,7 @@ class DoubleSwishFunction(torch.autograd.Function):
|
|||||||
"""
|
"""
|
||||||
double_swish(x) = x * torch.sigmoid(x-1)
|
double_swish(x) = x * torch.sigmoid(x-1)
|
||||||
This is a definition, originally motivated by its close numerical
|
This is a definition, originally motivated by its close numerical
|
||||||
similarity to swish(swish(x), where swish(x) = x * sigmoid(x).
|
similarity to swish(swish(x)), where swish(x) = x * sigmoid(x).
|
||||||
|
|
||||||
Memory-efficient derivative computation:
|
Memory-efficient derivative computation:
|
||||||
double_swish(x) = x * s, where s(x) = torch.sigmoid(x-1)
|
double_swish(x) = x * s, where s(x) = torch.sigmoid(x-1)
|
||||||
|
@ -111,7 +111,7 @@ def get_diagnostics_for_dim(
|
|||||||
options object
|
options object
|
||||||
sizes_same:
|
sizes_same:
|
||||||
True if all the tensor sizes are the same on this dimension
|
True if all the tensor sizes are the same on this dimension
|
||||||
stats_type: either "abs" or "positive" or "eigs" or "value",
|
stats_type: either "abs" or "positive" or "eigs" or "value",
|
||||||
imdictates the type of stats we accumulate, abs is mean absolute
|
imdictates the type of stats we accumulate, abs is mean absolute
|
||||||
value, "positive" is proportion of positive to nonnegative values,
|
value, "positive" is proportion of positive to nonnegative values,
|
||||||
"eigs" is eigenvalues after doing outer product on this dim, sum
|
"eigs" is eigenvalues after doing outer product on this dim, sum
|
||||||
|
Loading…
x
Reference in New Issue
Block a user