mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
minor updates
This commit is contained in:
parent
ba480b7b5f
commit
7feaa6185d
@ -796,15 +796,17 @@ def main():
|
|||||||
test_eval2000_cuts = switchboard.test_eval2000_cuts().trim_to_supervisions(
|
test_eval2000_cuts = switchboard.test_eval2000_cuts().trim_to_supervisions(
|
||||||
keep_all_channels=True
|
keep_all_channels=True
|
||||||
)
|
)
|
||||||
test_rt03_cuts = switchboard.test_rt03_cuts().trim_to_supervisions(
|
# test_rt03_cuts = switchboard.test_rt03_cuts().trim_to_supervisions(
|
||||||
keep_all_channels=True
|
# keep_all_channels=True
|
||||||
)
|
# )
|
||||||
|
|
||||||
test_eval2000_dl = switchboard.test_dataloaders(test_eval2000_cuts)
|
test_eval2000_dl = switchboard.test_dataloaders(test_eval2000_cuts)
|
||||||
test_rt03_dl = switchboard.test_dataloaders(test_rt03_cuts)
|
# test_rt03_dl = switchboard.test_dataloaders(test_rt03_cuts)
|
||||||
|
|
||||||
test_sets = ["test-eval2000", "test-rt03"]
|
# test_sets = ["test-eval2000", "test-rt03"]
|
||||||
test_dl = [test_eval2000_dl, test_rt03_dl]
|
# test_dl = [test_eval2000_dl, test_rt03_dl]
|
||||||
|
test_sets = ["test-eval2000"]
|
||||||
|
test_dl = [test_eval2000_dl]
|
||||||
|
|
||||||
for test_set, test_dl in zip(test_sets, test_dl):
|
for test_set, test_dl in zip(test_sets, test_dl):
|
||||||
results_dict = decode_dataset(
|
results_dict = decode_dataset(
|
||||||
|
|||||||
@ -97,7 +97,7 @@ def compute_fbank_switchboard(
|
|||||||
prefix = dir_name
|
prefix = dir_name
|
||||||
suffix = "jsonl.gz"
|
suffix = "jsonl.gz"
|
||||||
manifests = {
|
manifests = {
|
||||||
"eval2000": "data/manifests/eval2000/eval2000_cuts_all_trimmed.jsonl.gz",
|
"eval2000": "data/manifests/eval2000/eval2000_cuts_all.jsonl.gz",
|
||||||
}
|
}
|
||||||
assert manifests is not None
|
assert manifests is not None
|
||||||
|
|
||||||
@ -111,7 +111,12 @@ def compute_fbank_switchboard(
|
|||||||
logging.info(f"{prefix} already exists - skipping.")
|
logging.info(f"{prefix} already exists - skipping.")
|
||||||
return
|
return
|
||||||
logging.info(f"Processing {prefix}")
|
logging.info(f"Processing {prefix}")
|
||||||
cut_set = CutSet.from_file(manifests[prefix]).resample(16000)
|
cut_set = (
|
||||||
|
CutSet.from_file(manifests[prefix])
|
||||||
|
.resample(16000)
|
||||||
|
.to_eager()
|
||||||
|
.filter(lambda c: c.duration > 0.5)
|
||||||
|
)
|
||||||
|
|
||||||
cut_set = cut_set.compute_and_store_features(
|
cut_set = cut_set.compute_and_store_features(
|
||||||
extractor=extractor,
|
extractor=extractor,
|
||||||
@ -121,6 +126,7 @@ def compute_fbank_switchboard(
|
|||||||
executor=ex,
|
executor=ex,
|
||||||
storage_type=LilcomChunkyWriter,
|
storage_type=LilcomChunkyWriter,
|
||||||
)
|
)
|
||||||
|
cut_set = cut_set.trim_to_supervisions(keep_overlapping=False)
|
||||||
cut_set.to_file(output_dir / cuts_filename)
|
cut_set.to_file(output_dir / cuts_filename)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -30,8 +30,8 @@ from lhotse import load_manifest_lazy
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
# path = "./data/fbank/swbd_cuts_rt03.jsonl.gz"
|
# path = "./data/fbank/swbd_cuts_rt03.jsonl.gz"
|
||||||
# path = "./data/fbank/swbd_cuts_eval2000.jsonl.gz"
|
path = "./data/fbank/eval2000/eval2000_cuts_all.jsonl.gz"
|
||||||
path = "./data/fbank/swbd_cuts_all.jsonl.gz"
|
# path = "./data/fbank/swbd_cuts_all.jsonl.gz"
|
||||||
|
|
||||||
cuts = load_manifest_lazy(path)
|
cuts = load_manifest_lazy(path)
|
||||||
cuts.describe()
|
cuts.describe()
|
||||||
@ -41,7 +41,7 @@ if __name__ == "__main__":
|
|||||||
main()
|
main()
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Cut statistics:
|
Training Cut statistics:
|
||||||
╒═══════════════════════════╤═══════════╕
|
╒═══════════════════════════╤═══════════╕
|
||||||
│ Cuts count: │ 167244 │
|
│ Cuts count: │ 167244 │
|
||||||
├───────────────────────────┼───────────┤
|
├───────────────────────────┼───────────┤
|
||||||
@ -81,4 +81,45 @@ Speech duration statistics:
|
|||||||
├──────────────────────────────┼───────────┼──────────────────────┤
|
├──────────────────────────────┼───────────┼──────────────────────┤
|
||||||
│ Total silence duration │ 00:00:00 │ 0.00% of recording │
|
│ Total silence duration │ 00:00:00 │ 0.00% of recording │
|
||||||
╘══════════════════════════════╧═══════════╧══════════════════════╛
|
╘══════════════════════════════╧═══════════╧══════════════════════╛
|
||||||
|
|
||||||
|
Eval2000 Cut statistics:
|
||||||
|
╒═══════════════════════════╤══════════╕
|
||||||
|
│ Cuts count: │ 2709 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ Total duration (hh:mm:ss) │ 01:39:19 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ mean │ 2.2 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ std │ 1.8 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ min │ 0.1 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ 25% │ 0.7 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ 50% │ 1.7 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ 75% │ 3.1 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ 99% │ 8.0 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ 99.5% │ 8.3 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ 99.9% │ 11.3 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ max │ 14.1 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ Recordings available: │ 2709 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ Features available: │ 0 │
|
||||||
|
├───────────────────────────┼──────────┤
|
||||||
|
│ Supervisions available: │ 2709 │
|
||||||
|
╘═══════════════════════════╧══════════╛
|
||||||
|
Speech duration statistics:
|
||||||
|
╒══════════════════════════════╤══════════╤══════════════════════╕
|
||||||
|
│ Total speech duration │ 01:39:19 │ 100.00% of recording │
|
||||||
|
├──────────────────────────────┼──────────┼──────────────────────┤
|
||||||
|
│ Total speaking time duration │ 01:39:19 │ 100.00% of recording │
|
||||||
|
├──────────────────────────────┼──────────┼──────────────────────┤
|
||||||
|
│ Total silence duration │ 00:00:00 │ 0.00% of recording │
|
||||||
|
╘══════════════════════════════╧══════════╧══════════════════════╛
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -216,9 +216,6 @@ def main():
|
|||||||
"#0",
|
"#0",
|
||||||
"<s>",
|
"<s>",
|
||||||
"</s>",
|
"</s>",
|
||||||
"[VOCALIZED-NOISE]",
|
|
||||||
"[NOISE]",
|
|
||||||
"[LAUGHTER]",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for w in excluded:
|
for w in excluded:
|
||||||
|
|||||||
@ -75,6 +75,8 @@ def main():
|
|||||||
# If you change it, you should also change other
|
# If you change it, you should also change other
|
||||||
# places that are using it.
|
# places that are using it.
|
||||||
|
|
||||||
|
user_defined_symbols += ["[LAUGHTER]", "[NOISE]", "[VOCALIZED-NOISE]"]
|
||||||
|
|
||||||
model_file = Path(model_prefix + ".model")
|
model_file = Path(model_prefix + ".model")
|
||||||
if not model_file.is_file():
|
if not model_file.is_file():
|
||||||
spm.SentencePieceTrainer.train(
|
spm.SentencePieceTrainer.train(
|
||||||
|
|||||||
@ -45,7 +45,7 @@ fisher_dir="/export/corpora3/LDC/LDC2004T19"
|
|||||||
vocab_sizes=(
|
vocab_sizes=(
|
||||||
# 5000
|
# 5000
|
||||||
# 2000
|
# 2000
|
||||||
# 1000
|
1000
|
||||||
500
|
500
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -197,7 +197,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
|||||||
# [noise] nsn
|
# [noise] nsn
|
||||||
# !sil sil
|
# !sil sil
|
||||||
# <unk> spn
|
# <unk> spn
|
||||||
cat data/local/dict_nosp/lexicon.txt |
|
cat data/local/dict_nosp/lexicon.txt | sed 's/-//g' | sed 's/\[vocalizednoise\]/\[vocalized-noise\]/g' |
|
||||||
sort | uniq >$lang_dir/lexicon_lower.txt
|
sort | uniq >$lang_dir/lexicon_lower.txt
|
||||||
|
|
||||||
cat $lang_dir/lexicon_lower.txt | tr a-z A-Z > $lang_dir/lexicon.txt
|
cat $lang_dir/lexicon_lower.txt | tr a-z A-Z > $lang_dir/lexicon.txt
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user