Some configuration changes, trying to tune it so ctc_loss does not degrade from epoch 1..

This commit is contained in:
Daniel Povey 2021-09-23 19:38:57 +08:00
parent 6fa0f16e0c
commit 6636c05f12

View File

@ -173,11 +173,11 @@ def get_params() -> AttributeDict:
"use_double_scores": True, "use_double_scores": True,
"accum_grad": 1, "accum_grad": 1,
"att_scale": 0.5, "att_scale": 0.5,
"reverse_att_scale": 0.2, "reverse_att_scale": 0.25,
"ctc_scale": 0.3, "ctc_scale": 0.3,
"delay_scale": 0.1, # Scale on difference between current and "delay_scale": 2.5, # Scale on difference between current and
# delayed version of positive_embed. # delayed version of positive_embed.
"delay_minibatches": 200, "delay_minibatches": 300,
"attention_dim": 512, "attention_dim": 512,
"nhead": 8, "nhead": 8,
"num_trunk_encoder_layers": 12, "num_trunk_encoder_layers": 12,
@ -460,7 +460,7 @@ def compute_loss(
delayed_model = get_delayed_model(model, params) delayed_model = get_delayed_model(model, params)
with torch.random.fork_rng(devices=[device], enabled=True): with torch.random.fork_rng(devices=[device], enabled=True):
(old_memory, _, _) = delayed_model(feature, supervisions) (old_memory, _, _) = delayed_model(feature, supervisions)
(_, _, old_positive_embed, _, _) = delayed_model.sample_forward(old_memory) (_, old_softmax, _, _, _) = delayed_model.sample_forward(old_memory)
with torch.set_grad_enabled(is_training): with torch.set_grad_enabled(is_training):
@ -472,7 +472,7 @@ def compute_loss(
negative_embed_shifted) = mmodel.sample_forward(memory) negative_embed_shifted) = mmodel.sample_forward(memory)
if params.cur_epoch > 0 and params.delay_scale > 0.0: if params.cur_epoch > 0 and params.delay_scale > 0.0:
delay_loss = compute_distance(old_positive_embed, positive_embed) delay_loss = compute_distance(old_softmax, softmax)
num_subsampled_frames = memory.shape[0] * memory.shape[1] num_subsampled_frames = memory.shape[0] * memory.shape[1]