mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
Lots of changes to how min and max are applied, use 1-norm for min in smooth_cov but not _apply_min_max_with_metric.
This commit is contained in:
parent
6ab4cf615d
commit
9473c7e23d
@ -164,7 +164,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
betas=(0.9, 0.98),
|
betas=(0.9, 0.98),
|
||||||
size_lr_scale=0.1,
|
size_lr_scale=0.1,
|
||||||
cov_min=(0.025, 0.0025, 0.02, 0.0001),
|
cov_min=(0.025, 0.0025, 0.02, 0.0001),
|
||||||
cov_max=(10.0, 10.0, 5.0, 20.0),
|
cov_max=(5.0, 20.0, 5.0, 40.0),
|
||||||
cov_pow=(1.0, 1.0, 1.0, 1.0),
|
cov_pow=(1.0, 1.0, 1.0, 1.0),
|
||||||
param_rms_smooth0=0.4,
|
param_rms_smooth0=0.4,
|
||||||
param_rms_smooth1=0.2,
|
param_rms_smooth1=0.2,
|
||||||
@ -761,10 +761,14 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
# we don't need to multiply `smooth` by anything, because at this point, P_prime should have
|
# we don't need to multiply `smooth` by anything, because at this point, P_prime should have
|
||||||
# diagonal elements close to 1.
|
# diagonal elements close to 1.
|
||||||
|
|
||||||
P = P.clone()
|
P = self._smooth_cov(P,
|
||||||
P_diag = _diag(P)
|
max(smooth, group["cov_min"][0]),
|
||||||
P_diag_mean = _mean(P_diag, exclude_dims=[0], keepdim=True)
|
group["cov_max"][0],
|
||||||
P_diag += smooth * P_diag_mean
|
group["cov_pow"][0])
|
||||||
|
#P = P.clone()
|
||||||
|
#P_diag = _diag(P)
|
||||||
|
#P_diag_mean = _mean(P_diag, exclude_dims=[0], keepdim=True)
|
||||||
|
#P_diag += smooth * P_diag_mean
|
||||||
|
|
||||||
#G = G.clone()
|
#G = G.clone()
|
||||||
#G_diag = _diag(G) # aliased
|
#G_diag = _diag(G) # aliased
|
||||||
@ -818,9 +822,11 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
eps = 1.0e-20
|
eps = 1.0e-20
|
||||||
if power != 1.0:
|
if power != 1.0:
|
||||||
U, S, _ = _svd(X)
|
U, S, _ = _svd(X)
|
||||||
|
def mean(Y):
|
||||||
|
return _mean(Y, exclude_dims=[0], keepdim=True)
|
||||||
def rms(Y):
|
def rms(Y):
|
||||||
return _mean(Y**2, exclude_dims=[0], keepdim=True).sqrt()
|
return _mean(Y**2, exclude_dims=[0], keepdim=True).sqrt()
|
||||||
S = S + min_eig * rms(S) + eps
|
S = S + min_eig * mean(S) + eps
|
||||||
S = S / rms(S)
|
S = S / rms(S)
|
||||||
S = 1. / (1./S + 1./max_eig)
|
S = 1. / (1./S + 1./max_eig)
|
||||||
S = S ** power
|
S = S ** power
|
||||||
@ -833,7 +839,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
# rms of eigenvalues, or spectral 2-norm.
|
# rms of eigenvalues, or spectral 2-norm.
|
||||||
return (_sum(Y**2, exclude_dims=[0], keepdim=True) / size).sqrt()
|
return (_sum(Y**2, exclude_dims=[0], keepdim=True) / size).sqrt()
|
||||||
diag = _diag(X).unsqueeze(-1) # Aliased with X
|
diag = _diag(X).unsqueeze(-1) # Aliased with X
|
||||||
diag += (rms_eig(X) * min_eig + eps)
|
diag += (_mean(diag, exclude_dims=[0], keepdim=True) * min_eig + eps)
|
||||||
X /= rms_eig(X)
|
X /= rms_eig(X)
|
||||||
|
|
||||||
# eig_ceil is the maximum possible eigenvalue that X could possibly
|
# eig_ceil is the maximum possible eigenvalue that X could possibly
|
||||||
@ -892,6 +898,11 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
# size of the block-diagonal matrix..
|
# size of the block-diagonal matrix..
|
||||||
size = X.shape[1] * X.shape[3]
|
size = X.shape[1] * X.shape[3]
|
||||||
# mean eig of M^{0.5} X M^{0.5} ...
|
# mean eig of M^{0.5} X M^{0.5} ...
|
||||||
|
|
||||||
|
def mean_eig(Y):
|
||||||
|
# rms of eigenvalues, or spectral 2-norm.
|
||||||
|
return _mean(_diag(Y), exclude_dims=[0], keepdim=True).unsqueeze(-1)
|
||||||
|
|
||||||
def rms_eig(Y):
|
def rms_eig(Y):
|
||||||
# rms of eigenvalues, or spectral 2-norm.
|
# rms of eigenvalues, or spectral 2-norm.
|
||||||
return (_sum(Y**2, exclude_dims=[0], keepdim=True) / size).sqrt()
|
return (_sum(Y**2, exclude_dims=[0], keepdim=True) / size).sqrt()
|
||||||
@ -903,6 +914,8 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
|
|||||||
X = X * (1.0-min_eig) + min_eig * M.inverse()
|
X = X * (1.0-min_eig) + min_eig * M.inverse()
|
||||||
X = 0.5 * (X + X.transpose(-2, -1)) # make sure exactly symmetric.
|
X = 0.5 * (X + X.transpose(-2, -1)) # make sure exactly symmetric.
|
||||||
|
|
||||||
|
X /= rms_eig(torch.matmul(X, M))
|
||||||
|
|
||||||
# eig_ceil is the maximum possible eigenvalue that X could possibly
|
# eig_ceil is the maximum possible eigenvalue that X could possibly
|
||||||
# have at this time, equal to num_blocks * block_size.
|
# have at this time, equal to num_blocks * block_size.
|
||||||
eig_ceil = size ** 0.5
|
eig_ceil = size ** 0.5
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user