diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_gtrans/.conformer_randomcombine.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_gtrans/.conformer_randomcombine.py.swp index 05be579f8..785f07df7 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless_gtrans/.conformer_randomcombine.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless_gtrans/.conformer_randomcombine.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_gtrans/conformer_randomcombine.py b/egs/librispeech/ASR/pruned_transducer_stateless_gtrans/conformer_randomcombine.py index 0d80f918e..121254ec6 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless_gtrans/conformer_randomcombine.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless_gtrans/conformer_randomcombine.py @@ -211,14 +211,25 @@ class Conformer(EncoderInterface): ) # (T, N, C) x = x.permute(1, 0, 2) # (T, N, C) ->(N, T, C) - + layer_outputs = [x.permute(1, 0, 2) for x in layer_outputs] - x = self.layer_norm(1/4*(self.sigmoid(self.alpha[0])*layer_outputs[2] + \ - self.sigmoid(self.alpha[1])*layer_outputs[5] + \ - self.sigmoid(self.alpha[2])*layer_outputs[8] + \ - self.sigmoid(self.alpha[3])*layer_outputs[11] + if self.group_num == 4: + x = self.layer_norm(1/4*(self.sigmoid(self.alpha[0])*layer_outputs[2] + \ + self.sigmoid(self.alpha[1])*layer_outputs[5] + \ + self.sigmoid(self.alpha[2])*layer_outputs[8] + \ + self.sigmoid(self.alpha[3])*layer_outputs[11] + ) ) - ) + elif self.group_num == 6: + x = self.layer_norm(1/6*(self.sigmoid(self.alpha[0])*layer_outputs[1] + \ + self.sigmoid(self.alpha[1])*layer_outputs[3] + \ + self.sigmoid(self.alpha[2])*layer_outputs[5] + \ + self.sigmoid(self.alpha[3])*layer_outputs[7] + \ + self.sigmoid(self.alpha[4])*layer_outputs[9] + \ + self.sigmoid(self.alpha[5])*layer_outputs[11] + ) + ) + ''' x = self.layer_norm(1/12*(self.sigmoid(self.alpha[0])*layer_output[0] + \