mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-26 10:16:14 +00:00
update test set with wenetspeech test meeting
This commit is contained in:
parent
b26d3fa596
commit
3195a55ac7
@ -561,7 +561,8 @@ def main():
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
# test_sets_cuts = multi_dataset.test_cuts()
|
# test_sets_cuts = multi_dataset.test_cuts()
|
||||||
test_sets_cuts = multi_dataset.aishell_test_cuts()
|
# test_sets_cuts = multi_dataset.aishell_test_cuts()
|
||||||
|
test_sets_cuts = multi_dataset.wenetspeech_test_meeting_cuts()
|
||||||
|
|
||||||
test_sets = test_sets_cuts.keys()
|
test_sets = test_sets_cuts.keys()
|
||||||
test_dls = [
|
test_dls = [
|
||||||
|
@ -19,14 +19,14 @@
|
|||||||
"optimizer": {
|
"optimizer": {
|
||||||
"type": "Adam",
|
"type": "Adam",
|
||||||
"params": {
|
"params": {
|
||||||
"lr": 5e-4
|
"lr": 1e-4
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"scheduler": {
|
"scheduler": {
|
||||||
"type": "WarmupLR",
|
"type": "WarmupLR",
|
||||||
"params": {
|
"params": {
|
||||||
"warmup_min_lr": 0,
|
"warmup_min_lr": 0,
|
||||||
"warmup_max_lr": 5e-4,
|
"warmup_max_lr": 1e-4,
|
||||||
"warmup_num_steps": 100
|
"warmup_num_steps": 100
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -230,20 +230,20 @@ class MultiDataset:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"wenetspeech-meeting_test": wenetspeech_test_meeting_cuts,
|
"wenetspeech-meeting_test": wenetspeech_test_meeting_cuts,
|
||||||
# "aishell_test": aishell_test_cuts,
|
"aishell_test": aishell_test_cuts,
|
||||||
# "aishell_dev": aishell_dev_cuts,
|
"aishell_dev": aishell_dev_cuts,
|
||||||
# "ali-meeting_test": alimeeting_test_cuts,
|
"ali-meeting_test": alimeeting_test_cuts,
|
||||||
# "ali-meeting_eval": alimeeting_eval_cuts,
|
"ali-meeting_eval": alimeeting_eval_cuts,
|
||||||
# "aishell-4_test": aishell4_test_cuts,
|
"aishell-4_test": aishell4_test_cuts,
|
||||||
# "aishell-2_test": aishell2_test_cuts,
|
"aishell-2_test": aishell2_test_cuts,
|
||||||
# "aishell-2_dev": aishell2_dev_cuts,
|
"aishell-2_dev": aishell2_dev_cuts,
|
||||||
# "magicdata_test": magicdata_test_cuts,
|
"magicdata_test": magicdata_test_cuts,
|
||||||
# "magicdata_dev": magicdata_dev_cuts,
|
"magicdata_dev": magicdata_dev_cuts,
|
||||||
# "kespeech-asr_test": kespeech_test_cuts,
|
"kespeech-asr_test": kespeech_test_cuts,
|
||||||
# "kespeech-asr_dev_phase1": kespeech_dev_phase1_cuts,
|
"kespeech-asr_dev_phase1": kespeech_dev_phase1_cuts,
|
||||||
# "kespeech-asr_dev_phase2": kespeech_dev_phase2_cuts,
|
"kespeech-asr_dev_phase2": kespeech_dev_phase2_cuts,
|
||||||
# "wenetspeech-net_test": wenetspeech_test_net_cuts,
|
"wenetspeech-net_test": wenetspeech_test_net_cuts,
|
||||||
# "wenetspeech_dev": wenetspeech_dev_cuts,
|
"wenetspeech_dev": wenetspeech_dev_cuts,
|
||||||
}
|
}
|
||||||
|
|
||||||
def aishell_train_cuts(self) -> CutSet:
|
def aishell_train_cuts(self) -> CutSet:
|
||||||
@ -317,4 +317,17 @@ class MultiDataset:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"aishell2_test": aishell2_test_cuts,
|
"aishell2_test": aishell2_test_cuts,
|
||||||
|
}
|
||||||
|
|
||||||
|
def wenetspeech_test_meeting_cuts(self) -> CutSet:
|
||||||
|
logging.info("About to get multidataset test cuts")
|
||||||
|
|
||||||
|
# WeNetSpeech
|
||||||
|
logging.info("Loading WeNetSpeech set in lazy mode")
|
||||||
|
wenetspeech_test_meeting_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "wenetspeech" / "cuts_TEST_MEETING.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"wenetspeech-meeting_test": wenetspeech_test_meeting_cuts,
|
||||||
}
|
}
|
@ -823,14 +823,14 @@ def run(rank, world_size, args):
|
|||||||
# an utterance duration distribution for your dataset to select
|
# an utterance duration distribution for your dataset to select
|
||||||
# the threshold
|
# the threshold
|
||||||
if c.duration < 1.0 or c.duration > 20.0:
|
if c.duration < 1.0 or c.duration > 20.0:
|
||||||
logging.warning(
|
# logging.warning(
|
||||||
f"Exclude cut with ID {c.id} from training. Duration: {c.duration}"
|
# f"Exclude cut with ID {c.id} from training. Duration: {c.duration}"
|
||||||
)
|
# )
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# train_cuts = multi_dataset.train_cuts()
|
train_cuts = multi_dataset.train_cuts()
|
||||||
train_cuts = multi_dataset.aishell_train_cuts()
|
# train_cuts = multi_dataset.aishell_train_cuts()
|
||||||
# train_cuts = multi_dataset.aishell2_train_cuts()
|
# train_cuts = multi_dataset.aishell2_train_cuts()
|
||||||
train_cuts = train_cuts.filter(remove_short_and_long_utt)
|
train_cuts = train_cuts.filter(remove_short_and_long_utt)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user