minor updates

2025-12-11 06:55:27 +00:00 · 2023-08-23 14:45:10 +08:00 · 2023-08-23 14:45:10 +08:00 · 7feaa6185d
commit 7feaa6185d
parent ba480b7b5f
6 changed files with 64 additions and 16 deletions
--- a/egs/swbd/ASR/conformer_ctc/decode.py
+++ b/egs/swbd/ASR/conformer_ctc/decode.py
@ -796,15 +796,17 @@ def main():
    test_eval2000_cuts = switchboard.test_eval2000_cuts().trim_to_supervisions(
        keep_all_channels=True
    )
-    test_rt03_cuts = switchboard.test_rt03_cuts().trim_to_supervisions(
+    # test_rt03_cuts = switchboard.test_rt03_cuts().trim_to_supervisions(
-        keep_all_channels=True
+    #     keep_all_channels=True
-    )
+    # )
    test_eval2000_dl = switchboard.test_dataloaders(test_eval2000_cuts)
-    test_rt03_dl = switchboard.test_dataloaders(test_rt03_cuts)
+    # test_rt03_dl = switchboard.test_dataloaders(test_rt03_cuts)
-    test_sets = ["test-eval2000", "test-rt03"]
+    # test_sets = ["test-eval2000", "test-rt03"]
-    test_dl = [test_eval2000_dl, test_rt03_dl]
+    # test_dl = [test_eval2000_dl, test_rt03_dl]
    test_sets = ["test-eval2000"]
    test_dl = [test_eval2000_dl]
    for test_set, test_dl in zip(test_sets, test_dl):
        results_dict = decode_dataset(
--- a/egs/swbd/ASR/local/compute_fbank_eval2000.py
+++ b/egs/swbd/ASR/local/compute_fbank_eval2000.py
@ -97,7 +97,7 @@ def compute_fbank_switchboard(
    prefix = dir_name
    suffix = "jsonl.gz"
    manifests = {
-        "eval2000": "data/manifests/eval2000/eval2000_cuts_all_trimmed.jsonl.gz",
+        "eval2000": "data/manifests/eval2000/eval2000_cuts_all.jsonl.gz",
    }
    assert manifests is not None
@ -111,7 +111,12 @@ def compute_fbank_switchboard(
            logging.info(f"{prefix} already exists - skipping.")
            return
        logging.info(f"Processing {prefix}")
-        cut_set = CutSet.from_file(manifests[prefix]).resample(16000)
+        cut_set = (
            CutSet.from_file(manifests[prefix])
            .resample(16000)
            .to_eager()
            .filter(lambda c: c.duration > 0.5)
        )
        cut_set = cut_set.compute_and_store_features(
            extractor=extractor,
@ -121,6 +126,7 @@ def compute_fbank_switchboard(
            executor=ex,
            storage_type=LilcomChunkyWriter,
        )
        cut_set = cut_set.trim_to_supervisions(keep_overlapping=False)
        cut_set.to_file(output_dir / cuts_filename)
--- a/egs/swbd/ASR/local/display_manifest_statistics.py
+++ b/egs/swbd/ASR/local/display_manifest_statistics.py
@ -30,8 +30,8 @@ from lhotse import load_manifest_lazy
 def main():
    #  path = "./data/fbank/swbd_cuts_rt03.jsonl.gz"
-    #  path = "./data/fbank/swbd_cuts_eval2000.jsonl.gz"
+    path = "./data/fbank/eval2000/eval2000_cuts_all.jsonl.gz"
-    path = "./data/fbank/swbd_cuts_all.jsonl.gz"
+    # path = "./data/fbank/swbd_cuts_all.jsonl.gz"
    cuts = load_manifest_lazy(path)
    cuts.describe()
@ -41,7 +41,7 @@ if __name__ == "__main__":
    main()
 """
-Cut statistics:
+Training Cut statistics:
 ╒═══════════════════════════╤═══════════╕
 │ Cuts count:               │ 167244    │
 ├───────────────────────────┼───────────┤
@ -81,4 +81,45 @@ Speech duration statistics:
 ├──────────────────────────────┼───────────┼──────────────────────┤
 │ Total silence duration       │ 00:00:00  │ 0.00% of recording   │
 ╘══════════════════════════════╧═══════════╧══════════════════════╛
 Eval2000 Cut statistics:
 ╒═══════════════════════════╤══════════╕
 │ Cuts count:               │ 2709     │
 ├───────────────────────────┼──────────┤
 │ Total duration (hh:mm:ss) │ 01:39:19 │
 ├───────────────────────────┼──────────┤
 │ mean                      │ 2.2      │
 ├───────────────────────────┼──────────┤
 │ std                       │ 1.8      │
 ├───────────────────────────┼──────────┤
 │ min                       │ 0.1      │
 ├───────────────────────────┼──────────┤
 │ 25%                       │ 0.7      │
 ├───────────────────────────┼──────────┤
 │ 50%                       │ 1.7      │
 ├───────────────────────────┼──────────┤
 │ 75%                       │ 3.1      │
 ├───────────────────────────┼──────────┤
 │ 99%                       │ 8.0      │
 ├───────────────────────────┼──────────┤
 │ 99.5%                     │ 8.3      │
 ├───────────────────────────┼──────────┤
 │ 99.9%                     │ 11.3     │
 ├───────────────────────────┼──────────┤
 │ max                       │ 14.1     │
 ├───────────────────────────┼──────────┤
 │ Recordings available:     │ 2709     │
 ├───────────────────────────┼──────────┤
 │ Features available:       │ 0        │
 ├───────────────────────────┼──────────┤
 │ Supervisions available:   │ 2709     │
 ╘═══════════════════════════╧══════════╛
 Speech duration statistics:
 ╒══════════════════════════════╤══════════╤══════════════════════╕
 │ Total speech duration        │ 01:39:19 │ 100.00% of recording │
 ├──────────────────────────────┼──────────┼──────────────────────┤
 │ Total speaking time duration │ 01:39:19 │ 100.00% of recording │
 ├──────────────────────────────┼──────────┼──────────────────────┤
 │ Total silence duration       │ 00:00:00 │ 0.00% of recording   │
 ╘══════════════════════════════╧══════════╧══════════════════════╛
 """
--- a/egs/swbd/ASR/local/prepare_lang_bpe.py
+++ b/egs/swbd/ASR/local/prepare_lang_bpe.py
@ -216,9 +216,6 @@ def main():
        "#0",
        "<s>",
        "</s>",
        "[VOCALIZED-NOISE]",
        "[NOISE]",
        "[LAUGHTER]",
    ]
    for w in excluded:
--- a/egs/swbd/ASR/local/train_bpe_model.py
+++ b/egs/swbd/ASR/local/train_bpe_model.py
@ -75,6 +75,8 @@ def main():
    # If you change it, you should also change other
    # places that are using it.
    user_defined_symbols += ["[LAUGHTER]", "[NOISE]", "[VOCALIZED-NOISE]"]
    model_file = Path(model_prefix + ".model")
    if not model_file.is_file():
        spm.SentencePieceTrainer.train(
--- a/egs/swbd/ASR/prepare.sh
+++ b/egs/swbd/ASR/prepare.sh
@ -45,7 +45,7 @@ fisher_dir="/export/corpora3/LDC/LDC2004T19"
 vocab_sizes=(
    # 5000
    # 2000
-    # 1000
+    1000
    500
 )
@ -197,7 +197,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
    # [noise] nsn
    # !sil sil
    # <unk> spn
-    cat data/local/dict_nosp/lexicon.txt |
+    cat data/local/dict_nosp/lexicon.txt | sed 's/-//g' | sed 's/\[vocalizednoise\]/\[vocalized-noise\]/g' |
        sort | uniq >$lang_dir/lexicon_lower.txt
    cat $lang_dir/lexicon_lower.txt | tr a-z A-Z > $lang_dir/lexicon.txt