minor updates

This commit is contained in:
JinZr 2023-08-03 20:24:00 +08:00
parent 3631361b95
commit 96b7c7aecf
4 changed files with 19 additions and 15 deletions

View File

@ -493,6 +493,8 @@ class AlignmentAttentionModule(nn.Module):
embed_dim=pos_dim, dropout_rate=0.15
)
self.dropout = nn.Dropout(p=0.5)
def forward(
self, am_pruned: Tensor, lm_pruned: Tensor, lengths: torch.Tensor
) -> Tensor:

View File

@ -77,11 +77,12 @@ class Joiner(nn.Module):
if project_input:
logit = self.encoder_proj(encoder_out) + self.decoder_proj(decoder_out)
else:
if apply_attn:
# print(attn_encoder_out)
logit = encoder_out + decoder_out + attn_encoder_out
else:
# logging.info("disabling cross attn mdl")
logging.info("disabling cross attn mdl")
logit = encoder_out + decoder_out
logit = self.output_linear(torch.tanh(logit))

View File

@ -84,6 +84,7 @@ class AsrModel(nn.Module):
self.encoder_embed = encoder_embed
self.encoder = encoder
self.dropout = nn.Dropout(p=0.5)
self.use_transducer = use_transducer
if use_transducer:
@ -263,7 +264,7 @@ class AsrModel(nn.Module):
lm=self.joiner.decoder_proj(decoder_out),
ranges=ranges,
)
am_pruned = self.dropout(am_pruned)
# logits : [B, T, prune_range, vocab_size]
# project_input=False since we applied the decoder's input projections
@ -274,7 +275,7 @@ class AsrModel(nn.Module):
lm_pruned,
None,
encoder_out_lens,
apply_attn=batch_idx_train > self.params.warm_step, # True, # batch_idx_train > self.params.warm_step,
apply_attn=True, # batch_idx_train > self.params.warm_step,
project_input=False,
)

View File

@ -1236,14 +1236,14 @@ def run(rank, world_size, args):
valid_cuts += librispeech.dev_other_cuts()
valid_dl = librispeech.valid_dataloaders(valid_cuts)
if not params.print_diagnostics:
scan_pessimistic_batches_for_oom(
model=model,
train_dl=train_dl,
optimizer=optimizer,
sp=sp,
params=params,
)
# if not params.print_diagnostics:
# scan_pessimistic_batches_for_oom(
# model=model,
# train_dl=train_dl,
# optimizer=optimizer,
# sp=sp,
# params=params,
# )
scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0)
if checkpoints and "grad_scaler" in checkpoints: