From 4826f0801ce9ec6c7723748f8026c5394e56de19 Mon Sep 17 00:00:00 2001 From: Yuekai Zhang Date: Mon, 29 Jan 2024 10:08:10 +0800 Subject: [PATCH] remove utterance more than 30s in test_net --- egs/wenetspeech/ASR/whisper/decode.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/egs/wenetspeech/ASR/whisper/decode.py b/egs/wenetspeech/ASR/whisper/decode.py index b6b435a89..99a09bf64 100755 --- a/egs/wenetspeech/ASR/whisper/decode.py +++ b/egs/wenetspeech/ASR/whisper/decode.py @@ -481,7 +481,18 @@ def main(): dev_cuts = wenetspeech.valid_cuts() dev_dl = wenetspeech.valid_dataloaders(dev_cuts) + def remove_long_utt(c: Cut): + # Keep only utterances with duration in 30 seconds + # + if c.duration > 30.0: + # logging.warning( + # f"Exclude cut with ID {c.id} from training. Duration: {c.duration}" + # ) + return False + return True + test_net_cuts = wenetspeech.test_net_cuts() + test_net_cuts = test_net_cuts.filter(remove_long_utt) test_net_dl = wenetspeech.test_dataloaders(test_net_cuts) test_meeting_cuts = wenetspeech.test_meeting_cuts()