Remove grad clipping as there are no RNNs in the network.

This commit is contained in:
Fangjun Kuang 2021-12-28 10:31:20 +08:00
parent 14c93add50
commit 65212ee004

View File

@ -52,7 +52,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer from model import Transducer
from torch import Tensor from torch import Tensor
from torch.nn.parallel import DistributedDataParallel as DDP from torch.nn.parallel import DistributedDataParallel as DDP
from torch.nn.utils import clip_grad_norm_
from torch.utils.tensorboard import SummaryWriter from torch.utils.tensorboard import SummaryWriter
from transformer import Noam from transformer import Noam
@ -485,7 +484,6 @@ def train_one_epoch(
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() loss.backward()
clip_grad_norm_(model.parameters(), 5.0, 2.0)
optimizer.step() optimizer.step()
if batch_idx % params.log_interval == 0: if batch_idx % params.log_interval == 0:
@ -699,7 +697,6 @@ def scan_pessimistic_batches_for_oom(
is_training=True, is_training=True,
) )
loss.backward() loss.backward()
clip_grad_norm_(model.parameters(), 5.0, 2.0)
optimizer.step() optimizer.step()
except RuntimeError as e: except RuntimeError as e:
if "CUDA out of memory" in str(e): if "CUDA out of memory" in str(e):