mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-10 01:24:19 +00:00
removed redundant wenetspeech M and S sets
This commit is contained in:
parent
dfd9bb4dc9
commit
49b0a6d952
@ -44,8 +44,6 @@ class MultiDataset:
|
|||||||
- kespeech/kespeech-asr_cuts_train_phase1.jsonl.gz
|
- kespeech/kespeech-asr_cuts_train_phase1.jsonl.gz
|
||||||
- kespeech/kespeech-asr_cuts_train_phase2.jsonl.gz
|
- kespeech/kespeech-asr_cuts_train_phase2.jsonl.gz
|
||||||
- wenetspeech/cuts_L.jsonl.gz
|
- wenetspeech/cuts_L.jsonl.gz
|
||||||
- wenetspeech/cuts_M.jsonl.gz
|
|
||||||
- wenetspeech/cuts_S.jsonl.gz
|
|
||||||
"""
|
"""
|
||||||
self.fbank_dir = Path(fbank_dir)
|
self.fbank_dir = Path(fbank_dir)
|
||||||
|
|
||||||
@ -84,9 +82,7 @@ class MultiDataset:
|
|||||||
|
|
||||||
# ST-CMDS
|
# ST-CMDS
|
||||||
logging.info("Loading ST-CMDS in lazy mode")
|
logging.info("Loading ST-CMDS in lazy mode")
|
||||||
stcmds_cuts = load_manifest_lazy(
|
stcmds_cuts = load_manifest_lazy(self.fbank_dir / "stcmds_cuts_train.jsonl.gz")
|
||||||
self.fbank_dir / "stcmds_cuts_train.jsonl.gz"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Primewords
|
# Primewords
|
||||||
logging.info("Loading Primewords in lazy mode")
|
logging.info("Loading Primewords in lazy mode")
|
||||||
@ -117,12 +113,6 @@ class MultiDataset:
|
|||||||
wenetspeech_L_cuts = load_manifest_lazy(
|
wenetspeech_L_cuts = load_manifest_lazy(
|
||||||
self.fbank_dir / "wenetspeech" / "cuts_L.jsonl.gz"
|
self.fbank_dir / "wenetspeech" / "cuts_L.jsonl.gz"
|
||||||
)
|
)
|
||||||
wenetspeech_M_cuts = load_manifest_lazy(
|
|
||||||
self.fbank_dir / "wenetspeech" / "cuts_M.jsonl.gz"
|
|
||||||
)
|
|
||||||
wenetspeech_S_cuts = load_manifest_lazy(
|
|
||||||
self.fbank_dir / "wenetspeech" / "cuts_S.jsonl.gz"
|
|
||||||
)
|
|
||||||
|
|
||||||
# KeSpeech
|
# KeSpeech
|
||||||
logging.info("Loading KeSpeech in lazy mode")
|
logging.info("Loading KeSpeech in lazy mode")
|
||||||
@ -146,8 +136,6 @@ class MultiDataset:
|
|||||||
aidatatang_200zh_cuts,
|
aidatatang_200zh_cuts,
|
||||||
alimeeting_cuts,
|
alimeeting_cuts,
|
||||||
wenetspeech_L_cuts,
|
wenetspeech_L_cuts,
|
||||||
wenetspeech_M_cuts,
|
|
||||||
wenetspeech_S_cuts,
|
|
||||||
kespeech_1_cuts,
|
kespeech_1_cuts,
|
||||||
kespeech_2_cuts,
|
kespeech_2_cuts,
|
||||||
weights=[
|
weights=[
|
||||||
@ -163,8 +151,6 @@ class MultiDataset:
|
|||||||
len(aidatatang_200zh_cuts),
|
len(aidatatang_200zh_cuts),
|
||||||
len(alimeeting_cuts),
|
len(alimeeting_cuts),
|
||||||
len(wenetspeech_L_cuts),
|
len(wenetspeech_L_cuts),
|
||||||
len(wenetspeech_M_cuts),
|
|
||||||
len(wenetspeech_S_cuts),
|
|
||||||
len(kespeech_1_cuts),
|
len(kespeech_1_cuts),
|
||||||
len(kespeech_2_cuts),
|
len(kespeech_2_cuts),
|
||||||
],
|
],
|
||||||
@ -175,7 +161,9 @@ class MultiDataset:
|
|||||||
|
|
||||||
# Aidatatang_200zh
|
# Aidatatang_200zh
|
||||||
logging.info("Loading Aidatatang_200zh DEV set in lazy mode")
|
logging.info("Loading Aidatatang_200zh DEV set in lazy mode")
|
||||||
aidatatang_dev_cuts = load_manifest_lazy(self.fbank_dir / "aidatatang_cuts_dev.jsonl.gz")
|
aidatatang_dev_cuts = load_manifest_lazy(
|
||||||
|
self.fbank_dir / "aidatatang_cuts_dev.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
# AISHELL
|
# AISHELL
|
||||||
logging.info("Loading Aishell DEV set in lazy mode")
|
logging.info("Loading Aishell DEV set in lazy mode")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user