Some configuration changes, trying to tune it so ctc_loss does not degrade from epoch 1..

2025-08-13 03:52:18 +00:00 · 2021-09-23 19:38:57 +08:00 · 2021-09-23 19:38:57 +08:00 · 6636c05f12
commit 6636c05f12
parent 6fa0f16e0c
1 changed files with 5 additions and 5 deletions
--- a/egs/librispeech/ASR/conformer_ctc_bn_2d/train.py
+++ b/egs/librispeech/ASR/conformer_ctc_bn_2d/train.py
@ -173,11 +173,11 @@ def get_params() -> AttributeDict:
            "use_double_scores": True,
            "accum_grad": 1,
            "att_scale": 0.5,
-            "reverse_att_scale": 0.2,
+            "reverse_att_scale": 0.25,
            "ctc_scale": 0.3,
-            "delay_scale": 0.1,    # Scale on difference between current and
+            "delay_scale": 2.5,    # Scale on difference between current and
                                   # delayed version of positive_embed.
-            "delay_minibatches": 200,
+            "delay_minibatches": 300,
            "attention_dim": 512,
            "nhead": 8,
            "num_trunk_encoder_layers": 12,
@ -460,7 +460,7 @@ def compute_loss(
                delayed_model = get_delayed_model(model, params)
                with torch.random.fork_rng(devices=[device], enabled=True):
                    (old_memory, _, _) = delayed_model(feature, supervisions)
-                    (_, _, old_positive_embed, _, _) = delayed_model.sample_forward(old_memory)
+                    (_, old_softmax, _, _, _) = delayed_model.sample_forward(old_memory)
        with torch.set_grad_enabled(is_training):
@ -472,7 +472,7 @@ def compute_loss(
             negative_embed_shifted) = mmodel.sample_forward(memory)
            if params.cur_epoch > 0 and params.delay_scale > 0.0:
-                delay_loss = compute_distance(old_positive_embed, positive_embed)
+                delay_loss = compute_distance(old_softmax, softmax)
            num_subsampled_frames = memory.shape[0] * memory.shape[1]