Merge branch 'dev_multi_zh-hans' of https://github.com/JinZr/icefall into dev_multi_zh-hans

2023-07-19 11:53:50 +08:00 · 2023-07-19 11:53:50 +08:00 · 660e431da1
commit 660e431da1
parent c1aa9556d2 c061307c90
2 changed files with 5 additions and 5 deletions
--- a/egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_splits.py
+++ b/egs/multi_zh-hans/ASR/local/compute_fbank_kespeech_splits.py
@ -47,7 +47,7 @@ def get_parser():
    parser.add_argument(
        "--training-subset",
        type=str,
-        default="L",
+        default="train_phase1",
        help="The training subset for computing fbank feature.",
    )

@ -93,7 +93,7 @@ def compute_fbank_kespeech_splits(args):
    subset = args.training_subset
    subset = str(subset)
    num_splits = args.num_splits
-    output_dir = f"data/fbank/{subset}_split_{num_splits}"
+    output_dir = f"data/fbank/KeSpeech/{subset}_split_{num_splits}"
    output_dir = Path(output_dir)
    assert output_dir.exists(), f"{output_dir} does not exist!"

--- a/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py
+++ b/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py
@ -47,13 +47,13 @@ def has_no_oov(

 def preprocess_kespeech():
    src_dir = Path("data/manifests/KeSpeech")
-    output_dir = Path("data/fbank")
+    output_dir = Path("data/fbank/KeSpeech")
    output_dir.mkdir(exist_ok=True)

    # Note: By default, we preprocess all sub-parts.
    # You can delete those that you don't need.
-    # For instance, if you don't want to use the L subpart, just remove
-    # the line below containing "L"
+    # For instance, if you don't want to use the test subpart, just remove
+    # the line below containing "test"
    dataset_parts = (
        "dev_phase1",
        "dev_phase2",