diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp index b0e6eaa25..dc7325cf1 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.model.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.model.py.swp index a7fc329d1..60ab5b44d 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.model.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.model.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py index 25d1c8699..3a9228396 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py @@ -492,9 +492,12 @@ class Data2VecAudioModel(BaseFairseqModel): if prompt is not None: #features = torch.cat([features, prompt]) prompt = prompt.expand((features.size()[0], prompt.size()[0], prompt.size()[1])) + print(prompt.size()) features = torch.cat([prompt, features]) + print(features.size()) prompt_padding_mask = torch.zeros(promt.size()).type(torch.BoolTensor).to(features.device) padding_mask = torch.cat([prompt_padding_mask, padding_mask]) + print(padding_mask.size()) features = self.layer_norm(features)