mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-19 05:54:20 +00:00
Introduce scalar_max, stop eps getting large or small
This commit is contained in:
parent
2f73434541
commit
b7035844a2
@ -58,6 +58,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
learning the scale on the parameters (we'll keep it >= this size)
|
learning the scale on the parameters (we'll keep it >= this size)
|
||||||
param_max_rms: Maximum root-mean-square value of parameter tensor, for purposes of
|
param_max_rms: Maximum root-mean-square value of parameter tensor, for purposes of
|
||||||
learning the scale on the parameters (we'll keep it <= this size)
|
learning the scale on the parameters (we'll keep it <= this size)
|
||||||
|
scalar_max: Maximum absolute value for scalar parameters
|
||||||
size_update_period: The periodicity, in steps, with which we update the size (scale)
|
size_update_period: The periodicity, in steps, with which we update the size (scale)
|
||||||
of the parameter tensor. This is provided to save a little time.
|
of the parameter tensor. This is provided to save a little time.
|
||||||
lr_update_period: The periodicity, in steps, with which we update the learning-rate matrices.
|
lr_update_period: The periodicity, in steps, with which we update the learning-rate matrices.
|
||||||
@ -75,6 +76,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
eps=1.0e-08,
|
eps=1.0e-08,
|
||||||
param_min_rms=1.0e-05,
|
param_min_rms=1.0e-05,
|
||||||
param_max_rms=2.0,
|
param_max_rms=2.0,
|
||||||
|
scalar_max=2.0,
|
||||||
size_update_period=4,
|
size_update_period=4,
|
||||||
lr_update_period=200,
|
lr_update_period=200,
|
||||||
grad_cov_period=3,
|
grad_cov_period=3,
|
||||||
@ -92,6 +94,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
eps=eps,
|
eps=eps,
|
||||||
param_min_rms=param_min_rms,
|
param_min_rms=param_min_rms,
|
||||||
param_max_rms=param_max_rms,
|
param_max_rms=param_max_rms,
|
||||||
|
scalar_max=scalar_max,
|
||||||
size_update_period=size_update_period,
|
size_update_period=size_update_period,
|
||||||
lr_update_period=lr_update_period,
|
lr_update_period=lr_update_period,
|
||||||
grad_cov_period=grad_cov_period,
|
grad_cov_period=grad_cov_period,
|
||||||
@ -514,9 +517,6 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _step(self,
|
def _step(self,
|
||||||
group: dict,
|
group: dict,
|
||||||
p: Tensor,
|
p: Tensor,
|
||||||
@ -618,7 +618,8 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
|
|
||||||
delta = state["delta"]
|
delta = state["delta"]
|
||||||
delta.add_(grad / denom, alpha=alpha)
|
delta.add_(grad / denom, alpha=alpha)
|
||||||
|
scalar_max = state["scalar_max"]
|
||||||
|
p.clamp_(min=-scalar_max, max=scalar_max)
|
||||||
|
|
||||||
def _project(self,
|
def _project(self,
|
||||||
x: Tensor,
|
x: Tensor,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user