Remove grad clipping as there are no RNNs in the network.

2025-08-09 18:12:19 +00:00 · 2021-12-28 10:31:20 +08:00 · 2021-12-28 10:31:20 +08:00 · 65212ee004
commit 65212ee004
parent 14c93add50
1 changed files with 0 additions and 3 deletions
--- a/egs/librispeech/ASR/transducer_stateless/train.py
+++ b/egs/librispeech/ASR/transducer_stateless/train.py
@ -52,7 +52,6 @@ from lhotse.utils import fix_random_seed
 from model import Transducer
 from torch import Tensor
 from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.nn.utils import clip_grad_norm_
 from torch.utils.tensorboard import SummaryWriter
 from transformer import Noam

@ -485,7 +484,6 @@ def train_one_epoch(

        optimizer.zero_grad()
        loss.backward()
-        clip_grad_norm_(model.parameters(), 5.0, 2.0)
        optimizer.step()

        if batch_idx % params.log_interval == 0:
@ -699,7 +697,6 @@ def scan_pessimistic_batches_for_oom(
                is_training=True,
            )
            loss.backward()
-            clip_grad_norm_(model.parameters(), 5.0, 2.0)
            optimizer.step()
        except RuntimeError as e:
            if "CUDA out of memory" in str(e):