Don't skip penalize_abs_values_gt due to memory cutoff; remove grad_scale=0.1

This commit is contained in:
Daniel Povey 2023-05-29 16:29:12 +08:00
parent 7fdd125ba9
commit cbd59b9c68
2 changed files with 2 additions and 2 deletions

View File

@ -892,7 +892,6 @@ class LearnedDownsamplingModule(nn.Module):
max_positive=0.6, max_positive=0.6,
min_abs=1.0, min_abs=1.0,
max_abs=4.0, max_abs=4.0,
grad_scale=0.1,
prob=ScheduledFloat((0.0, 1.0), (8000.0, 0.25), default=0.0)) prob=ScheduledFloat((0.0, 1.0), (8000.0, 0.25), default=0.0))

View File

@ -897,8 +897,9 @@ class AbsValuePenalizer(nn.Module):
def forward(self, x: Tensor) -> Tensor: def forward(self, x: Tensor) -> Tensor:
if (torch.jit.is_scripting() or not x.requires_grad or if (torch.jit.is_scripting() or not x.requires_grad or
(x.is_cuda and self.mem_cutoff(torch.cuda.memory_allocated())) or not self.training
or random.random() > self.prob): or random.random() > self.prob):
# or (x.is_cuda and self.mem_cutoff(torch.cuda.memory_allocated()))
return _no_op(x) # the _no_op op is to make our diagnostics code work. return _no_op(x) # the _no_op op is to make our diagnostics code work.
x = penalize_abs_values_gt(x, x = penalize_abs_values_gt(x,