Don't skip penalize_abs_values_gt due to memory cutoff; remove grad_scale=0.1

2025-09-19 05:54:20 +00:00 · 2023-05-29 16:29:12 +08:00 · 2023-05-29 16:29:12 +08:00 · cbd59b9c68
commit cbd59b9c68
parent 7fdd125ba9
2 changed files with 2 additions and 2 deletions
--- a/egs/libriheavy/LM/zipformer1/subformer.py
+++ b/egs/libriheavy/LM/zipformer1/subformer.py
@ -892,7 +892,6 @@ class LearnedDownsamplingModule(nn.Module):
                                       max_positive=0.6,
                                       min_abs=1.0,
                                       max_abs=4.0,
-                                       grad_scale=0.1,
                                       prob=ScheduledFloat((0.0, 1.0), (8000.0, 0.25), default=0.0))


--- a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
@ -897,8 +897,9 @@ class AbsValuePenalizer(nn.Module):

    def forward(self, x: Tensor) -> Tensor:
        if (torch.jit.is_scripting() or not x.requires_grad or
-            (x.is_cuda and self.mem_cutoff(torch.cuda.memory_allocated()))
+            or not self.training
            or random.random() > self.prob):
+            # or (x.is_cuda and self.mem_cutoff(torch.cuda.memory_allocated()))
            return _no_op(x)  # the _no_op op is to make our diagnostics code work.

        x = penalize_abs_values_gt(x,