diff --git a/egs/libritts/CODEC/encodec/encodec.py b/egs/libritts/CODEC/encodec/encodec.py index f21d494b6..d0bf234ae 100644 --- a/egs/libritts/CODEC/encodec/encodec.py +++ b/egs/libritts/CODEC/encodec/encodec.py @@ -148,7 +148,7 @@ class Encodec(nn.Module): ) # calculate losses - with autocast(enabled=False): + with autocast("cuda", enabled=False): gen_stft_adv_loss = self.generator_adversarial_loss(outputs=y_hat) if self.multi_period_discriminator is not None: @@ -272,7 +272,7 @@ class Encodec(nn.Module): speech_hat.contiguous().detach(), ) # calculate losses - with autocast(enabled=False): + with autocast("cuda", enabled=False): ( disc_stft_real_adv_loss, disc_stft_fake_adv_loss, diff --git a/egs/libritts/CODEC/encodec/train.py b/egs/libritts/CODEC/encodec/train.py index 307e20142..31349df43 100755 --- a/egs/libritts/CODEC/encodec/train.py +++ b/egs/libritts/CODEC/encodec/train.py @@ -466,7 +466,7 @@ def train_one_epoch( loss_info["samples"] = batch_size try: - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): d_weight = train_discriminator( params.lambda_adv, params.cur_epoch, @@ -502,7 +502,7 @@ def train_one_epoch( scaler.scale(disc_loss).backward() scaler.step(optimizer_d) - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): g_weight = train_discriminator( params.lambda_adv, params.cur_epoch, @@ -846,7 +846,7 @@ def scan_pessimistic_batches_for_oom( ) = prepare_input(params, batch, device) try: # for discriminator - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): ( disc_stft_real_adv_loss, disc_stft_fake_adv_loss, @@ -876,7 +876,7 @@ def scan_pessimistic_batches_for_oom( optimizer_d.zero_grad() loss_d.backward() # for generator - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): ( commit_loss, gen_stft_adv_loss, diff --git a/egs/libritts/TTS/vits/train.py b/egs/libritts/TTS/vits/train.py index 8d8fde961..6803d6eb2 100755 --- a/egs/libritts/TTS/vits/train.py +++ b/egs/libritts/TTS/vits/train.py @@ -456,7 +456,7 @@ def train_one_epoch( loss_info["samples"] = batch_size try: - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): # forward discriminator loss_d, stats_d = model( text=tokens, @@ -475,7 +475,7 @@ def train_one_epoch( scaler.scale(loss_d).backward() scaler.step(optimizer_d) - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): # forward generator loss_g, stats_g = model( text=tokens, @@ -748,7 +748,7 @@ def scan_pessimistic_batches_for_oom( ) = prepare_input(batch, tokenizer, device, train_speaker_map) try: # for discriminator - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): loss_d, stats_d = model( text=tokens, text_lengths=tokens_lens, @@ -762,7 +762,7 @@ def scan_pessimistic_batches_for_oom( optimizer_d.zero_grad() loss_d.backward() # for generator - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): loss_g, stats_g = model( text=tokens, text_lengths=tokens_lens, diff --git a/egs/ljspeech/TTS/matcha/train.py b/egs/ljspeech/TTS/matcha/train.py index 03f90ad0f..13db44da0 100755 --- a/egs/ljspeech/TTS/matcha/train.py +++ b/egs/ljspeech/TTS/matcha/train.py @@ -479,7 +479,7 @@ def train_one_epoch( tokens_lens, ) = prepare_input(batch, tokenizer, device, params) try: - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): losses = get_losses( { "x": tokens, diff --git a/egs/ljspeech/TTS/vits/train.py b/egs/ljspeech/TTS/vits/train.py index e8dfef60f..e9994319a 100755 --- a/egs/ljspeech/TTS/vits/train.py +++ b/egs/ljspeech/TTS/vits/train.py @@ -396,7 +396,7 @@ def train_one_epoch( loss_info["samples"] = batch_size try: - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): # forward discriminator loss_d, stats_d = model( text=tokens, @@ -414,7 +414,7 @@ def train_one_epoch( scaler.scale(loss_d).backward() scaler.step(optimizer_d) - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): # forward generator loss_g, stats_g = model( text=tokens, @@ -673,7 +673,7 @@ def scan_pessimistic_batches_for_oom( ) try: # for discriminator - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): loss_d, stats_d = model( text=tokens, text_lengths=tokens_lens, @@ -686,7 +686,7 @@ def scan_pessimistic_batches_for_oom( optimizer_d.zero_grad() loss_d.backward() # for generator - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): loss_g, stats_g = model( text=tokens, text_lengths=tokens_lens, diff --git a/egs/ljspeech/TTS/vits/vits.py b/egs/ljspeech/TTS/vits/vits.py index a1fabf9ad..1c0f252dc 100644 --- a/egs/ljspeech/TTS/vits/vits.py +++ b/egs/ljspeech/TTS/vits/vits.py @@ -410,7 +410,7 @@ class VITS(nn.Module): p = self.discriminator(speech_) # calculate losses - with autocast(enabled=False): + with autocast("cuda", enabled=False): if not return_sample: mel_loss = self.mel_loss(speech_hat_, speech_) else: @@ -518,7 +518,7 @@ class VITS(nn.Module): p = self.discriminator(speech_) # calculate losses - with autocast(enabled=False): + with autocast("cuda", enabled=False): real_loss, fake_loss = self.discriminator_adv_loss(p_hat, p) loss = real_loss + fake_loss diff --git a/egs/vctk/TTS/vits/train.py b/egs/vctk/TTS/vits/train.py index 16518d65e..6249640d4 100755 --- a/egs/vctk/TTS/vits/train.py +++ b/egs/vctk/TTS/vits/train.py @@ -448,7 +448,7 @@ def train_one_epoch( loss_info["samples"] = batch_size try: - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): # forward discriminator loss_d, stats_d = model( text=tokens, @@ -467,7 +467,7 @@ def train_one_epoch( scaler.scale(loss_d).backward() scaler.step(optimizer_d) - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): # forward generator loss_g, stats_g = model( text=tokens, @@ -740,7 +740,7 @@ def scan_pessimistic_batches_for_oom( ) = prepare_input(batch, tokenizer, device, speaker_map) try: # for discriminator - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): loss_d, stats_d = model( text=tokens, text_lengths=tokens_lens, @@ -754,7 +754,7 @@ def scan_pessimistic_batches_for_oom( optimizer_d.zero_grad() loss_d.backward() # for generator - with autocast(enabled=params.use_fp16): + with autocast("cuda", enabled=params.use_fp16): loss_g, stats_g = model( text=tokens, text_lengths=tokens_lens,