minor updates

2025-09-08 00:24:19 +00:00 · 2023-08-23 14:45:10 +08:00 · 2023-08-23 14:45:10 +08:00 · 7feaa6185d
commit 7feaa6185d
parent ba480b7b5f
6 changed files with 64 additions and 16 deletions
--- a/egs/swbd/ASR/conformer_ctc/decode.py
+++ b/egs/swbd/ASR/conformer_ctc/decode.py
@ -796,15 +796,17 @@ def main():
    test_eval2000_cuts = switchboard.test_eval2000_cuts().trim_to_supervisions(
        keep_all_channels=True
    )
-    test_rt03_cuts = switchboard.test_rt03_cuts().trim_to_supervisions(
-        keep_all_channels=True
-    )
+    # test_rt03_cuts = switchboard.test_rt03_cuts().trim_to_supervisions(
+    #     keep_all_channels=True
+    # )

    test_eval2000_dl = switchboard.test_dataloaders(test_eval2000_cuts)
-    test_rt03_dl = switchboard.test_dataloaders(test_rt03_cuts)
+    # test_rt03_dl = switchboard.test_dataloaders(test_rt03_cuts)

-    test_sets = ["test-eval2000", "test-rt03"]
-    test_dl = [test_eval2000_dl, test_rt03_dl]
+    # test_sets = ["test-eval2000", "test-rt03"]
+    # test_dl = [test_eval2000_dl, test_rt03_dl]
+    test_sets = ["test-eval2000"]
+    test_dl = [test_eval2000_dl]

    for test_set, test_dl in zip(test_sets, test_dl):
        results_dict = decode_dataset(
--- a/egs/swbd/ASR/local/compute_fbank_eval2000.py
+++ b/egs/swbd/ASR/local/compute_fbank_eval2000.py
@ -97,7 +97,7 @@ def compute_fbank_switchboard(
    prefix = dir_name
    suffix = "jsonl.gz"
    manifests = {
-        "eval2000": "data/manifests/eval2000/eval2000_cuts_all_trimmed.jsonl.gz",
+        "eval2000": "data/manifests/eval2000/eval2000_cuts_all.jsonl.gz",
    }
    assert manifests is not None

@ -111,7 +111,12 @@ def compute_fbank_switchboard(
            logging.info(f"{prefix} already exists - skipping.")
            return
        logging.info(f"Processing {prefix}")
-        cut_set = CutSet.from_file(manifests[prefix]).resample(16000)
+        cut_set = (
+            CutSet.from_file(manifests[prefix])
+            .resample(16000)
+            .to_eager()
+            .filter(lambda c: c.duration > 0.5)
+        )

        cut_set = cut_set.compute_and_store_features(
            extractor=extractor,
@ -121,6 +126,7 @@ def compute_fbank_switchboard(
            executor=ex,
            storage_type=LilcomChunkyWriter,
        )
+        cut_set = cut_set.trim_to_supervisions(keep_overlapping=False)
        cut_set.to_file(output_dir / cuts_filename)


--- a/egs/swbd/ASR/local/display_manifest_statistics.py
+++ b/egs/swbd/ASR/local/display_manifest_statistics.py
@ -30,8 +30,8 @@ from lhotse import load_manifest_lazy

 def main():
    #  path = "./data/fbank/swbd_cuts_rt03.jsonl.gz"
-    #  path = "./data/fbank/swbd_cuts_eval2000.jsonl.gz"
-    path = "./data/fbank/swbd_cuts_all.jsonl.gz"
+    path = "./data/fbank/eval2000/eval2000_cuts_all.jsonl.gz"
+    # path = "./data/fbank/swbd_cuts_all.jsonl.gz"

    cuts = load_manifest_lazy(path)
    cuts.describe()
@ -41,7 +41,7 @@ if __name__ == "__main__":
    main()

 """
-Cut statistics:
+Training Cut statistics:
 ╒═══════════════════════════╤═══════════╕
 │ Cuts count:               │ 167244    │
 ├───────────────────────────┼───────────┤
@ -81,4 +81,45 @@ Speech duration statistics:
 ├──────────────────────────────┼───────────┼──────────────────────┤
 │ Total silence duration       │ 00:00:00  │ 0.00% of recording   │
 ╘══════════════════════════════╧═══════════╧══════════════════════╛
+
+Eval2000 Cut statistics:
+╒═══════════════════════════╤══════════╕
+│ Cuts count:               │ 2709     │
+├───────────────────────────┼──────────┤
+│ Total duration (hh:mm:ss) │ 01:39:19 │
+├───────────────────────────┼──────────┤
+│ mean                      │ 2.2      │
+├───────────────────────────┼──────────┤
+│ std                       │ 1.8      │
+├───────────────────────────┼──────────┤
+│ min                       │ 0.1      │
+├───────────────────────────┼──────────┤
+│ 25%                       │ 0.7      │
+├───────────────────────────┼──────────┤
+│ 50%                       │ 1.7      │
+├───────────────────────────┼──────────┤
+│ 75%                       │ 3.1      │
+├───────────────────────────┼──────────┤
+│ 99%                       │ 8.0      │
+├───────────────────────────┼──────────┤
+│ 99.5%                     │ 8.3      │
+├───────────────────────────┼──────────┤
+│ 99.9%                     │ 11.3     │
+├───────────────────────────┼──────────┤
+│ max                       │ 14.1     │
+├───────────────────────────┼──────────┤
+│ Recordings available:     │ 2709     │
+├───────────────────────────┼──────────┤
+│ Features available:       │ 0        │
+├───────────────────────────┼──────────┤
+│ Supervisions available:   │ 2709     │
+╘═══════════════════════════╧══════════╛
+Speech duration statistics:
+╒══════════════════════════════╤══════════╤══════════════════════╕
+│ Total speech duration        │ 01:39:19 │ 100.00% of recording │
+├──────────────────────────────┼──────────┼──────────────────────┤
+│ Total speaking time duration │ 01:39:19 │ 100.00% of recording │
+├──────────────────────────────┼──────────┼──────────────────────┤
+│ Total silence duration       │ 00:00:00 │ 0.00% of recording   │
+╘══════════════════════════════╧══════════╧══════════════════════╛
 """
--- a/egs/swbd/ASR/local/prepare_lang_bpe.py
+++ b/egs/swbd/ASR/local/prepare_lang_bpe.py
@ -216,9 +216,6 @@ def main():
        "#0",
        "<s>",
        "</s>",
-        "[VOCALIZED-NOISE]",
-        "[NOISE]",
-        "[LAUGHTER]",
    ]

    for w in excluded:
--- a/egs/swbd/ASR/local/train_bpe_model.py
+++ b/egs/swbd/ASR/local/train_bpe_model.py
@ -75,6 +75,8 @@ def main():
    # If you change it, you should also change other
    # places that are using it.

+    user_defined_symbols += ["[LAUGHTER]", "[NOISE]", "[VOCALIZED-NOISE]"]
+
    model_file = Path(model_prefix + ".model")
    if not model_file.is_file():
        spm.SentencePieceTrainer.train(
--- a/egs/swbd/ASR/prepare.sh
+++ b/egs/swbd/ASR/prepare.sh
@ -45,7 +45,7 @@ fisher_dir="/export/corpora3/LDC/LDC2004T19"
 vocab_sizes=(
    # 5000
    # 2000
-    # 1000
+    1000
    500
 )

@ -197,7 +197,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
    # [noise] nsn
    # !sil sil
    # <unk> spn
-    cat data/local/dict_nosp/lexicon.txt |
+    cat data/local/dict_nosp/lexicon.txt | sed 's/-//g' | sed 's/\[vocalizednoise\]/\[vocalized-noise\]/g' |
        sort | uniq >$lang_dir/lexicon_lower.txt

    cat $lang_dir/lexicon_lower.txt | tr a-z A-Z > $lang_dir/lexicon.txt