diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp index ea84c5876..a39ab45e2 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.model.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.model.py.swp index 258053f5a..2aaae08de 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.model.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.model.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py index 383a98a00..491bb6b35 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py @@ -495,6 +495,7 @@ class Data2VecAudioModel(BaseFairseqModel): features = torch.cat([prompt, features], dim=1) prompt_padding_mask = torch.zeros(prompt.size()[0], prompt.size()[1]).type(torch.BoolTensor).to(features.device) padding_mask = torch.cat([prompt_padding_mask, padding_mask], dim=1) + print(padding_mask) features = self.layer_norm(features)