mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-10 18:42:19 +00:00
fix speed perturb issue
This commit is contained in:
parent
bfb4ebeb83
commit
0e8c1db4d0
@ -632,9 +632,10 @@ class AsrDataModule:
|
|||||||
@lru_cache()
|
@lru_cache()
|
||||||
def train_cuts_librispeech(self) -> CutSet:
|
def train_cuts_librispeech(self) -> CutSet:
|
||||||
logging.info("About to get train cuts")
|
logging.info("About to get train cuts")
|
||||||
|
if self.args.huggingface_dataset_path_or_name is not None:
|
||||||
# librispeech_path="fixie-ai/librispeech_asr"
|
librispeech_path = self.args.huggingface_dataset_path_or_name + "/librispeech_asr"
|
||||||
librispeech_path = "/workspace/slam/librispeech_asr"
|
else:
|
||||||
|
librispeech_path = "fixie-ai/librispeech_asr"
|
||||||
# 148_688
|
# 148_688
|
||||||
librispeech_other = load_dataset(
|
librispeech_other = load_dataset(
|
||||||
librispeech_path, "other", split="train.500", streaming=True
|
librispeech_path, "other", split="train.500", streaming=True
|
||||||
|
@ -867,7 +867,7 @@ def run(rank, world_size, args):
|
|||||||
# You should use ../local/display_manifest_statistics.py to get
|
# You should use ../local/display_manifest_statistics.py to get
|
||||||
# an utterance duration distribution for your dataset to select
|
# an utterance duration distribution for your dataset to select
|
||||||
# the threshold
|
# the threshold
|
||||||
if c.duration < 1.0 or c.duration > 29.0:
|
if c.duration < 1.0 or c.duration > 25.0:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"Exclude cut with ID {c.id} from training. Duration: {c.duration}"
|
f"Exclude cut with ID {c.id} from training. Duration: {c.duration}"
|
||||||
)
|
)
|
||||||
@ -892,9 +892,9 @@ def run(rank, world_size, args):
|
|||||||
train_cuts = data_module.train_cuts_en_vocalnet()
|
train_cuts = data_module.train_cuts_en_vocalnet()
|
||||||
valid_cuts = data_module.valid_cuts_en_vocalnet()
|
valid_cuts = data_module.valid_cuts_en_vocalnet()
|
||||||
elif params.dataset_format == "speech_continuation":
|
elif params.dataset_format == "speech_continuation":
|
||||||
# train_cuts = data_module.train_cuts_ultravox()
|
train_cuts = data_module.train_cuts_ultravox()
|
||||||
# train_cuts = data_module.train_cuts_gigaspeech()
|
# train_cuts = data_module.train_cuts_gigaspeech()
|
||||||
train_cuts = data_module.train_cuts_librispeech()
|
# train_cuts = data_module.train_cuts_librispeech()
|
||||||
valid_cuts = data_module.valid_cuts_ultravox()
|
valid_cuts = data_module.valid_cuts_ultravox()
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown dataset format: {params.dataset_format}")
|
raise ValueError(f"Unknown dataset format: {params.dataset_format}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user