diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp index 5571c93cb..fdca8c5ac 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.data2vec_audio.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py index 71a2fee14..8cae787ab 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/data2vec_audio.py @@ -467,7 +467,8 @@ class Data2VecAudioModel(BaseFairseqModel): ## for prompt tuning if prompt is not None: - features = torch.cat([features, prompt]) + #features = torch.cat([features, prompt]) + features = torch.cat([prompt, features]) features = self.layer_norm(features) @@ -494,12 +495,12 @@ class Data2VecAudioModel(BaseFairseqModel): else: padding_mask = None - print(padding_mask.size()) - print((padding_mask[0] == True).nonzero(as_tuple=True)[0]) - print((padding_mask[1] == True).nonzero(as_tuple=True)[0][1]) - print((padding_mask[2] == True).nonzero(as_tuple=True)[0][2]) - print((padding_mask[3] == True).nonzero(as_tuple=True)[0][3]) - exit() + #print(padding_mask.size()) + #print((padding_mask[0] == True).nonzero(as_tuple=True)[0]) + #print((padding_mask[1] == True).nonzero(as_tuple=True)[0][1]) + #print((padding_mask[2] == True).nonzero(as_tuple=True)[0][2]) + #print((padding_mask[3] == True).nonzero(as_tuple=True)[0][3]) + #exit() if self.post_extract_proj is not None: features = self.post_extract_proj(features)