mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-19 05:54:20 +00:00
minor updates
This commit is contained in:
parent
3631361b95
commit
96b7c7aecf
@ -493,6 +493,8 @@ class AlignmentAttentionModule(nn.Module):
|
||||
embed_dim=pos_dim, dropout_rate=0.15
|
||||
)
|
||||
|
||||
self.dropout = nn.Dropout(p=0.5)
|
||||
|
||||
def forward(
|
||||
self, am_pruned: Tensor, lm_pruned: Tensor, lengths: torch.Tensor
|
||||
) -> Tensor:
|
||||
|
@ -77,11 +77,12 @@ class Joiner(nn.Module):
|
||||
|
||||
if project_input:
|
||||
logit = self.encoder_proj(encoder_out) + self.decoder_proj(decoder_out)
|
||||
else:
|
||||
|
||||
if apply_attn:
|
||||
# print(attn_encoder_out)
|
||||
logit = encoder_out + decoder_out + attn_encoder_out
|
||||
else:
|
||||
# logging.info("disabling cross attn mdl")
|
||||
logging.info("disabling cross attn mdl")
|
||||
logit = encoder_out + decoder_out
|
||||
|
||||
logit = self.output_linear(torch.tanh(logit))
|
||||
|
@ -84,6 +84,7 @@ class AsrModel(nn.Module):
|
||||
|
||||
self.encoder_embed = encoder_embed
|
||||
self.encoder = encoder
|
||||
self.dropout = nn.Dropout(p=0.5)
|
||||
|
||||
self.use_transducer = use_transducer
|
||||
if use_transducer:
|
||||
@ -263,7 +264,7 @@ class AsrModel(nn.Module):
|
||||
lm=self.joiner.decoder_proj(decoder_out),
|
||||
ranges=ranges,
|
||||
)
|
||||
|
||||
am_pruned = self.dropout(am_pruned)
|
||||
# logits : [B, T, prune_range, vocab_size]
|
||||
|
||||
# project_input=False since we applied the decoder's input projections
|
||||
@ -274,7 +275,7 @@ class AsrModel(nn.Module):
|
||||
lm_pruned,
|
||||
None,
|
||||
encoder_out_lens,
|
||||
apply_attn=batch_idx_train > self.params.warm_step, # True, # batch_idx_train > self.params.warm_step,
|
||||
apply_attn=True, # batch_idx_train > self.params.warm_step,
|
||||
project_input=False,
|
||||
)
|
||||
|
||||
|
@ -1236,14 +1236,14 @@ def run(rank, world_size, args):
|
||||
valid_cuts += librispeech.dev_other_cuts()
|
||||
valid_dl = librispeech.valid_dataloaders(valid_cuts)
|
||||
|
||||
if not params.print_diagnostics:
|
||||
scan_pessimistic_batches_for_oom(
|
||||
model=model,
|
||||
train_dl=train_dl,
|
||||
optimizer=optimizer,
|
||||
sp=sp,
|
||||
params=params,
|
||||
)
|
||||
# if not params.print_diagnostics:
|
||||
# scan_pessimistic_batches_for_oom(
|
||||
# model=model,
|
||||
# train_dl=train_dl,
|
||||
# optimizer=optimizer,
|
||||
# sp=sp,
|
||||
# params=params,
|
||||
# )
|
||||
|
||||
scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0)
|
||||
if checkpoints and "grad_scaler" in checkpoints:
|
||||
|
Loading…
x
Reference in New Issue
Block a user