Merge branch 'dev_multi_zh-hans' of https://github.com/JinZr/icefall into dev_multi_zh-hans

This commit is contained in:
JinZr 2023-07-19 11:53:50 +08:00
commit 660e431da1
2 changed files with 5 additions and 5 deletions

View File

@ -47,7 +47,7 @@ def get_parser():
parser.add_argument(
"--training-subset",
type=str,
default="L",
default="train_phase1",
help="The training subset for computing fbank feature.",
)
@ -93,7 +93,7 @@ def compute_fbank_kespeech_splits(args):
subset = args.training_subset
subset = str(subset)
num_splits = args.num_splits
output_dir = f"data/fbank/{subset}_split_{num_splits}"
output_dir = f"data/fbank/KeSpeech/{subset}_split_{num_splits}"
output_dir = Path(output_dir)
assert output_dir.exists(), f"{output_dir} does not exist!"

View File

@ -47,13 +47,13 @@ def has_no_oov(
def preprocess_kespeech():
src_dir = Path("data/manifests/KeSpeech")
output_dir = Path("data/fbank")
output_dir = Path("data/fbank/KeSpeech")
output_dir.mkdir(exist_ok=True)
# Note: By default, we preprocess all sub-parts.
# You can delete those that you don't need.
# For instance, if you don't want to use the L subpart, just remove
# the line below containing "L"
# For instance, if you don't want to use the test subpart, just remove
# the line below containing "test"
dataset_parts = (
"dev_phase1",
"dev_phase2",