From 0e8c1db4d0be15d126147f6a0334b168989d24a7 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 15 May 2025 22:45:04 -0700 Subject: [PATCH] fix speed perturb issue --- egs/speech_llm/SPEECH2SPEECH/qwen_omni/data_module.py | 7 ++++--- egs/speech_llm/SPEECH2SPEECH/qwen_omni/train.py | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/egs/speech_llm/SPEECH2SPEECH/qwen_omni/data_module.py b/egs/speech_llm/SPEECH2SPEECH/qwen_omni/data_module.py index 1f35f9b84..72f26a803 100644 --- a/egs/speech_llm/SPEECH2SPEECH/qwen_omni/data_module.py +++ b/egs/speech_llm/SPEECH2SPEECH/qwen_omni/data_module.py @@ -632,9 +632,10 @@ class AsrDataModule: @lru_cache() def train_cuts_librispeech(self) -> CutSet: logging.info("About to get train cuts") - - # librispeech_path="fixie-ai/librispeech_asr" - librispeech_path = "/workspace/slam/librispeech_asr" + if self.args.huggingface_dataset_path_or_name is not None: + librispeech_path = self.args.huggingface_dataset_path_or_name + "/librispeech_asr" + else: + librispeech_path = "fixie-ai/librispeech_asr" # 148_688 librispeech_other = load_dataset( librispeech_path, "other", split="train.500", streaming=True diff --git a/egs/speech_llm/SPEECH2SPEECH/qwen_omni/train.py b/egs/speech_llm/SPEECH2SPEECH/qwen_omni/train.py index ba9e97577..c523c92a5 100755 --- a/egs/speech_llm/SPEECH2SPEECH/qwen_omni/train.py +++ b/egs/speech_llm/SPEECH2SPEECH/qwen_omni/train.py @@ -867,7 +867,7 @@ def run(rank, world_size, args): # You should use ../local/display_manifest_statistics.py to get # an utterance duration distribution for your dataset to select # the threshold - if c.duration < 1.0 or c.duration > 29.0: + if c.duration < 1.0 or c.duration > 25.0: logging.warning( f"Exclude cut with ID {c.id} from training. Duration: {c.duration}" ) @@ -892,9 +892,9 @@ def run(rank, world_size, args): train_cuts = data_module.train_cuts_en_vocalnet() valid_cuts = data_module.valid_cuts_en_vocalnet() elif params.dataset_format == "speech_continuation": - # train_cuts = data_module.train_cuts_ultravox() + train_cuts = data_module.train_cuts_ultravox() # train_cuts = data_module.train_cuts_gigaspeech() - train_cuts = data_module.train_cuts_librispeech() + # train_cuts = data_module.train_cuts_librispeech() valid_cuts = data_module.valid_cuts_ultravox() else: raise ValueError(f"Unknown dataset format: {params.dataset_format}")