diff --git a/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_xlarge.py b/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_xlarge.py index e25f9b0af..8b609fdc5 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_xlarge.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless6/hubert_xlarge.py @@ -178,9 +178,7 @@ class HubertXlargeFineTuned: N = encoder_embedding.shape[0] assert len(cut_list) == N # 320 is from: 16,000 / 50 = sample_rate / hbuert output frame rate - num_frames = [ - supervisions["num_samples"][i].item() // 320 for i in range(N) - ] + num_frames = (supervisions["num_samples"] // 320).tolist() return encoder_embedding, num_frames def ctc_greedy_search(self, batch):