diff --git a/egs/librispeech/ASR/conv_emformer_transducer/decode.py b/egs/librispeech/ASR/conv_emformer_transducer/decode.py index c40b01dfa..47b4f9fd0 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer/decode.py +++ b/egs/librispeech/ASR/conv_emformer_transducer/decode.py @@ -480,6 +480,7 @@ def main(): # is defined in local/train_bpe_model.py params.blank_id = sp.piece_to_id("") + params.unk_id = sp.piece_to_id("") params.vocab_size = sp.get_piece_size() logging.info(params) diff --git a/egs/librispeech/ASR/conv_emformer_transducer/emformer.py b/egs/librispeech/ASR/conv_emformer_transducer/emformer.py index fca949e42..3c520f5c3 100644 --- a/egs/librispeech/ASR/conv_emformer_transducer/emformer.py +++ b/egs/librispeech/ASR/conv_emformer_transducer/emformer.py @@ -729,7 +729,7 @@ class EmformerLayer(nn.Module): lengths: torch.Tensor, memory: torch.Tensor, pos_emb: torch.Tensor, - attention_mask: Optional[torch.Tensor], + attention_mask: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor]: """Apply attention module in non-infer mode.""" if attention_mask is None: @@ -897,8 +897,8 @@ class EmformerLayer(nn.Module): right_context_end_idx, lengths, memory, - attention_mask, pos_emb, + attention_mask, ) right_context_utterance = self._apply_conv_module_forward(