mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-10 17:44:20 +00:00
refined
This commit is contained in:
parent
a91c90636b
commit
24296d8d48
@ -125,6 +125,7 @@ from icefall.checkpoint import (
|
|||||||
find_checkpoints,
|
find_checkpoints,
|
||||||
load_checkpoint,
|
load_checkpoint,
|
||||||
)
|
)
|
||||||
|
from lhotse.cut import Cut
|
||||||
from icefall.lexicon import Lexicon
|
from icefall.lexicon import Lexicon
|
||||||
from icefall.utils import (
|
from icefall.utils import (
|
||||||
AttributeDict,
|
AttributeDict,
|
||||||
@ -792,11 +793,19 @@ def main():
|
|||||||
# test_clean_dl = librispeech.test_dataloaders(test_clean_cuts)
|
# test_clean_dl = librispeech.test_dataloaders(test_clean_cuts)
|
||||||
# test_other_dl = librispeech.test_dataloaders(test_other_cuts)
|
# test_other_dl = librispeech.test_dataloaders(test_other_cuts)
|
||||||
|
|
||||||
|
def remove_short_utt(c: Cut):
|
||||||
|
T = ((c.num_frames - 7) // 2 + 1) // 2
|
||||||
|
if T <= 0:
|
||||||
|
logging.warning(
|
||||||
|
f"Excluding cut with ID: {c.id} from decoding, num_frames: {c.num_frames}"
|
||||||
|
)
|
||||||
|
return T > 0
|
||||||
|
|
||||||
test_sets_cuts = multi_dataset.test_cuts()
|
test_sets_cuts = multi_dataset.test_cuts()
|
||||||
|
|
||||||
test_sets = test_sets_cuts.keys()
|
test_sets = test_sets_cuts.keys()
|
||||||
test_dl = [
|
test_dl = [
|
||||||
librispeech.test_dataloaders(test_sets_cuts[cuts_name])
|
librispeech.test_dataloaders(test_sets_cuts[cuts_name].filter(remove_short_utt))
|
||||||
for cuts_name in test_sets
|
for cuts_name in test_sets
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -221,22 +221,31 @@ class MultiDataset:
|
|||||||
logging.info("About to get multidataset test cuts")
|
logging.info("About to get multidataset test cuts")
|
||||||
|
|
||||||
# Aidatatang_200zh
|
# Aidatatang_200zh
|
||||||
logging.info("Loading Aidatatang_200zh TEST set in lazy mode")
|
logging.info("Loading Aidatatang_200zh set in lazy mode")
|
||||||
aidatatang_test_cuts = load_manifest_lazy(
|
aidatatang_test_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "aidatatang_cuts_test.jsonl.gz"
|
self.fbank_dir / "aidatatang_cuts_test.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
aidatatang_dev_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "aidatatang_cuts_dev.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
# AISHELL
|
# AISHELL
|
||||||
logging.info("Loading Aishell TEST set in lazy mode")
|
logging.info("Loading Aishell set in lazy mode")
|
||||||
aishell_test_cuts = load_manifest_lazy(
|
aishell_test_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "aishell_cuts_test.jsonl.gz"
|
self.fbank_dir / "aishell_cuts_test.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
aishell_dev_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "aishell_cuts_dev.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
# AISHELL-2
|
# AISHELL-2
|
||||||
logging.info("Loading Aishell-2 TEST set in lazy mode")
|
logging.info("Loading Aishell-2 set in lazy mode")
|
||||||
aishell2_test_cuts = load_manifest_lazy(
|
aishell2_test_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "aishell2_cuts_test.jsonl.gz"
|
self.fbank_dir / "aishell2_cuts_test.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
aishell2_dev_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "aishell2_cuts_dev.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
# AISHELL-4
|
# AISHELL-4
|
||||||
logging.info("Loading Aishell-4 TEST set in lazy mode")
|
logging.info("Loading Aishell-4 TEST set in lazy mode")
|
||||||
@ -245,40 +254,63 @@ class MultiDataset:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Ali-Meeting
|
# Ali-Meeting
|
||||||
logging.info("Loading Ali-Meeting TEST set in lazy mode")
|
logging.info("Loading Ali-Meeting set in lazy mode")
|
||||||
alimeeting_test_cuts = load_manifest_lazy(
|
alimeeting_test_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "alimeeting-far_cuts_test.jsonl.gz"
|
self.fbank_dir / "alimeeting-far_cuts_test.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
alimeeting_eval_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "alimeeting-far_cuts_eval.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
# MagicData
|
# MagicData
|
||||||
logging.info("Loading MagicData TEST set in lazy mode")
|
logging.info("Loading MagicData set in lazy mode")
|
||||||
magicdata_test_cuts = load_manifest_lazy(
|
magicdata_test_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "magicdata_cuts_test.jsonl.gz"
|
self.fbank_dir / "magicdata_cuts_test.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
magicdata_dev_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "magicdata_cuts_dev.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
# KeSpeech
|
# KeSpeech
|
||||||
logging.info("Loading KeSpeech TEST set in lazy mode")
|
logging.info("Loading KeSpeech set in lazy mode")
|
||||||
kespeech_test_cuts = load_manifest_lazy(
|
kespeech_test_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "kespeech" / "kespeech-asr_cuts_test.jsonl.gz"
|
self.fbank_dir / "kespeech" / "kespeech-asr_cuts_test.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
kespeech_dev_phase1_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "kespeech" / "kespeech-asr_cuts_dev_phase1.jsonl.gz"
|
||||||
|
)
|
||||||
|
kespeech_dev_phase2_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "kespeech" / "kespeech-asr_cuts_dev_phase2.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
# WeNetSpeech
|
# WeNetSpeech
|
||||||
logging.info("Loading WeNetSpeech TEST set in lazy mode")
|
logging.info("Loading WeNetSpeech set in lazy mode")
|
||||||
wenetspeech_test_meeting_cuts = load_manifest_lazy(
|
wenetspeech_test_meeting_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "wenetspeech" / "cuts_TEST_MEETING.jsonl.gz"
|
self.fbank_dir / "wenetspeech" / "cuts_TEST_MEETING.jsonl.gz"
|
||||||
)
|
)
|
||||||
wenetspeech_test_net_cuts = load_manifest_lazy(
|
wenetspeech_test_net_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "wenetspeech" / "cuts_TEST_NET.jsonl.gz"
|
self.fbank_dir / "wenetspeech" / "cuts_TEST_NET.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
wenetspeech_dev_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "wenetspeech" / "cuts_DEV.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"aidatatang": aidatatang_test_cuts,
|
"aidatatang_test": aidatatang_test_cuts,
|
||||||
# "alimeeting": alimeeting_test_cuts,
|
"aidatatang_dev": aidatatang_dev_cuts,
|
||||||
"aishell": aishell_test_cuts,
|
"alimeeting_test": alimeeting_test_cuts,
|
||||||
"aishell-2": aishell2_test_cuts,
|
"alimeeting_eval": alimeeting_eval_cuts,
|
||||||
|
"aishell_test": aishell_test_cuts,
|
||||||
|
"aishell_dev": aishell_dev_cuts,
|
||||||
|
"aishell-2_test": aishell2_test_cuts,
|
||||||
|
"aishell-2_dev": aishell2_dev_cuts,
|
||||||
"aishell-4": aishell4_test_cuts,
|
"aishell-4": aishell4_test_cuts,
|
||||||
"magicdata": magicdata_test_cuts,
|
"magicdata_test": magicdata_test_cuts,
|
||||||
"kespeech": kespeech_test_cuts,
|
"magicdata_dev": magicdata_dev_cuts,
|
||||||
"wenetspeech-meeting": wenetspeech_test_meeting_cuts,
|
"kespeech-asr_test": kespeech_test_cuts,
|
||||||
"wenetspeech-net": wenetspeech_test_net_cuts,
|
"kespeech-asr_dev_phase1": kespeech_dev_phase1_cuts,
|
||||||
|
"kespeech-asr_dev_phase2": kespeech_dev_phase2_cuts,
|
||||||
|
"wenetspeech-meeting_test": wenetspeech_test_meeting_cuts,
|
||||||
|
"wenetspeech-net_test": wenetspeech_test_net_cuts,
|
||||||
|
"wenetspeech_dev": wenetspeech_dev_cuts,
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user