mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
fix speed perturb issue
This commit is contained in:
parent
bfb4ebeb83
commit
0e8c1db4d0
@ -632,9 +632,10 @@ class AsrDataModule:
|
||||
@lru_cache()
|
||||
def train_cuts_librispeech(self) -> CutSet:
|
||||
logging.info("About to get train cuts")
|
||||
|
||||
# librispeech_path="fixie-ai/librispeech_asr"
|
||||
librispeech_path = "/workspace/slam/librispeech_asr"
|
||||
if self.args.huggingface_dataset_path_or_name is not None:
|
||||
librispeech_path = self.args.huggingface_dataset_path_or_name + "/librispeech_asr"
|
||||
else:
|
||||
librispeech_path = "fixie-ai/librispeech_asr"
|
||||
# 148_688
|
||||
librispeech_other = load_dataset(
|
||||
librispeech_path, "other", split="train.500", streaming=True
|
||||
|
@ -867,7 +867,7 @@ def run(rank, world_size, args):
|
||||
# You should use ../local/display_manifest_statistics.py to get
|
||||
# an utterance duration distribution for your dataset to select
|
||||
# the threshold
|
||||
if c.duration < 1.0 or c.duration > 29.0:
|
||||
if c.duration < 1.0 or c.duration > 25.0:
|
||||
logging.warning(
|
||||
f"Exclude cut with ID {c.id} from training. Duration: {c.duration}"
|
||||
)
|
||||
@ -892,9 +892,9 @@ def run(rank, world_size, args):
|
||||
train_cuts = data_module.train_cuts_en_vocalnet()
|
||||
valid_cuts = data_module.valid_cuts_en_vocalnet()
|
||||
elif params.dataset_format == "speech_continuation":
|
||||
# train_cuts = data_module.train_cuts_ultravox()
|
||||
train_cuts = data_module.train_cuts_ultravox()
|
||||
# train_cuts = data_module.train_cuts_gigaspeech()
|
||||
train_cuts = data_module.train_cuts_librispeech()
|
||||
# train_cuts = data_module.train_cuts_librispeech()
|
||||
valid_cuts = data_module.valid_cuts_ultravox()
|
||||
else:
|
||||
raise ValueError(f"Unknown dataset format: {params.dataset_format}")
|
||||
|
Loading…
x
Reference in New Issue
Block a user