mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-19 05:54:20 +00:00
minor updates
This commit is contained in:
parent
3631361b95
commit
96b7c7aecf
@ -493,6 +493,8 @@ class AlignmentAttentionModule(nn.Module):
|
|||||||
embed_dim=pos_dim, dropout_rate=0.15
|
embed_dim=pos_dim, dropout_rate=0.15
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.dropout = nn.Dropout(p=0.5)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self, am_pruned: Tensor, lm_pruned: Tensor, lengths: torch.Tensor
|
self, am_pruned: Tensor, lm_pruned: Tensor, lengths: torch.Tensor
|
||||||
) -> Tensor:
|
) -> Tensor:
|
||||||
|
@ -77,11 +77,12 @@ class Joiner(nn.Module):
|
|||||||
|
|
||||||
if project_input:
|
if project_input:
|
||||||
logit = self.encoder_proj(encoder_out) + self.decoder_proj(decoder_out)
|
logit = self.encoder_proj(encoder_out) + self.decoder_proj(decoder_out)
|
||||||
else:
|
|
||||||
if apply_attn:
|
if apply_attn:
|
||||||
|
# print(attn_encoder_out)
|
||||||
logit = encoder_out + decoder_out + attn_encoder_out
|
logit = encoder_out + decoder_out + attn_encoder_out
|
||||||
else:
|
else:
|
||||||
# logging.info("disabling cross attn mdl")
|
logging.info("disabling cross attn mdl")
|
||||||
logit = encoder_out + decoder_out
|
logit = encoder_out + decoder_out
|
||||||
|
|
||||||
logit = self.output_linear(torch.tanh(logit))
|
logit = self.output_linear(torch.tanh(logit))
|
||||||
|
@ -84,6 +84,7 @@ class AsrModel(nn.Module):
|
|||||||
|
|
||||||
self.encoder_embed = encoder_embed
|
self.encoder_embed = encoder_embed
|
||||||
self.encoder = encoder
|
self.encoder = encoder
|
||||||
|
self.dropout = nn.Dropout(p=0.5)
|
||||||
|
|
||||||
self.use_transducer = use_transducer
|
self.use_transducer = use_transducer
|
||||||
if use_transducer:
|
if use_transducer:
|
||||||
@ -263,7 +264,7 @@ class AsrModel(nn.Module):
|
|||||||
lm=self.joiner.decoder_proj(decoder_out),
|
lm=self.joiner.decoder_proj(decoder_out),
|
||||||
ranges=ranges,
|
ranges=ranges,
|
||||||
)
|
)
|
||||||
|
am_pruned = self.dropout(am_pruned)
|
||||||
# logits : [B, T, prune_range, vocab_size]
|
# logits : [B, T, prune_range, vocab_size]
|
||||||
|
|
||||||
# project_input=False since we applied the decoder's input projections
|
# project_input=False since we applied the decoder's input projections
|
||||||
@ -274,7 +275,7 @@ class AsrModel(nn.Module):
|
|||||||
lm_pruned,
|
lm_pruned,
|
||||||
None,
|
None,
|
||||||
encoder_out_lens,
|
encoder_out_lens,
|
||||||
apply_attn=batch_idx_train > self.params.warm_step, # True, # batch_idx_train > self.params.warm_step,
|
apply_attn=True, # batch_idx_train > self.params.warm_step,
|
||||||
project_input=False,
|
project_input=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1236,14 +1236,14 @@ def run(rank, world_size, args):
|
|||||||
valid_cuts += librispeech.dev_other_cuts()
|
valid_cuts += librispeech.dev_other_cuts()
|
||||||
valid_dl = librispeech.valid_dataloaders(valid_cuts)
|
valid_dl = librispeech.valid_dataloaders(valid_cuts)
|
||||||
|
|
||||||
if not params.print_diagnostics:
|
# if not params.print_diagnostics:
|
||||||
scan_pessimistic_batches_for_oom(
|
# scan_pessimistic_batches_for_oom(
|
||||||
model=model,
|
# model=model,
|
||||||
train_dl=train_dl,
|
# train_dl=train_dl,
|
||||||
optimizer=optimizer,
|
# optimizer=optimizer,
|
||||||
sp=sp,
|
# sp=sp,
|
||||||
params=params,
|
# params=params,
|
||||||
)
|
# )
|
||||||
|
|
||||||
scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0)
|
scaler = GradScaler(enabled=params.use_fp16, init_scale=1.0)
|
||||||
if checkpoints and "grad_scaler" in checkpoints:
|
if checkpoints and "grad_scaler" in checkpoints:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user