minor updates

This commit is contained in:
JinZr 2023-08-03 20:24:00 +08:00
parent 3631361b95
commit 96b7c7aecf
4 changed files with 19 additions and 15 deletions

View File

@ -493,6 +493,8 @@ class AlignmentAttentionModule(nn.Module):
embed_dim=pos_dim, dropout_rate=0.15 embed_dim=pos_dim, dropout_rate=0.15
) )
self.dropout = nn.Dropout(p=0.5)
def forward( def forward(
self, am_pruned: Tensor, lm_pruned: Tensor, lengths: torch.Tensor self, am_pruned: Tensor, lm_pruned: Tensor, lengths: torch.Tensor
) -> Tensor: ) -> Tensor:

View File

@ -77,11 +77,12 @@ class Joiner(nn.Module):
if project_input: if project_input:
logit = self.encoder_proj(encoder_out) + self.decoder_proj(decoder_out) logit = self.encoder_proj(encoder_out) + self.decoder_proj(decoder_out)
else:
if apply_attn: if apply_attn:
# print(attn_encoder_out)
logit = encoder_out + decoder_out + attn_encoder_out logit = encoder_out + decoder_out + attn_encoder_out
else: else:
# logging.info("disabling cross attn mdl") logging.info("disabling cross attn mdl")
logit = encoder_out + decoder_out logit = encoder_out + decoder_out
logit = self.output_linear(torch.tanh(logit)) logit = self.output_linear(torch.tanh(logit))

View File

@ -84,6 +84,7 @@ class AsrModel(nn.Module):
self.encoder_embed = encoder_embed self.encoder_embed = encoder_embed
self.encoder = encoder self.encoder = encoder
self.dropout = nn.Dropout(p=0.5)
self.use_transducer = use_transducer self.use_transducer = use_transducer
if use_transducer: if use_transducer:
@ -263,7 +264,7 @@ class AsrModel(nn.Module):
lm=self.joiner.decoder_proj(decoder_out), lm=self.joiner.decoder_proj(decoder_out),
ranges=ranges, ranges=ranges,
) )
am_pruned = self.dropout(am_pruned)
# logits : [B, T, prune_range, vocab_size] # logits : [B, T, prune_range, vocab_size]
# project_input=False since we applied the decoder's input projections # project_input=False since we applied the decoder's input projections
@ -274,7 +275,7 @@ class AsrModel(nn.Module):
lm_pruned, lm_pruned,
None, None,
encoder_out_lens, encoder_out_lens,
apply_attn=batch_idx_train > self.params.warm_step, # True, # batch_idx_train > self.params.warm_step, apply_attn=True, # batch_idx_train > self.params.warm_step,
project_input=False, project_input=False,
) )

View File

@ -1236,14 +1236,14 @@ def run(rank, world_size, args):
valid_cuts += librispeech.dev_other_cuts() valid_cuts += librispeech.dev_other_cuts()
valid_dl = librispeech.valid_dataloaders(valid_cuts) valid_dl = librispeech.valid_dataloaders(valid_cuts)
if not params.print_diagnostics: # if not params.print_diagnostics:
scan_pessimistic_batches_for_oom( # scan_pessimistic_batches_for_oom(
model=model, # model=model,
train_dl=train_dl, # train_dl=train_dl,
optimizer=optimizer, # optimizer=optimizer,
sp=sp, # sp=sp,
params=params, # params=params,
) # )
scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0) scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0)
if checkpoints and "grad_scaler" in checkpoints: if checkpoints and "grad_scaler" in checkpoints: