Merge branch 'dev_multi_zh-hans' of https://github.com/JinZr/icefall into dev_multi_zh-hans

This commit is contained in:
JinZr 2023-07-19 11:53:50 +08:00
commit 660e431da1
2 changed files with 5 additions and 5 deletions

View File

@ -47,7 +47,7 @@ def get_parser():
parser.add_argument( parser.add_argument(
"--training-subset", "--training-subset",
type=str, type=str,
default="L", default="train_phase1",
help="The training subset for computing fbank feature.", help="The training subset for computing fbank feature.",
) )
@ -93,7 +93,7 @@ def compute_fbank_kespeech_splits(args):
subset = args.training_subset subset = args.training_subset
subset = str(subset) subset = str(subset)
num_splits = args.num_splits num_splits = args.num_splits
output_dir = f"data/fbank/{subset}_split_{num_splits}" output_dir = f"data/fbank/KeSpeech/{subset}_split_{num_splits}"
output_dir = Path(output_dir) output_dir = Path(output_dir)
assert output_dir.exists(), f"{output_dir} does not exist!" assert output_dir.exists(), f"{output_dir} does not exist!"

View File

@ -47,13 +47,13 @@ def has_no_oov(
def preprocess_kespeech(): def preprocess_kespeech():
src_dir = Path("data/manifests/KeSpeech") src_dir = Path("data/manifests/KeSpeech")
output_dir = Path("data/fbank") output_dir = Path("data/fbank/KeSpeech")
output_dir.mkdir(exist_ok=True) output_dir.mkdir(exist_ok=True)
# Note: By default, we preprocess all sub-parts. # Note: By default, we preprocess all sub-parts.
# You can delete those that you don't need. # You can delete those that you don't need.
# For instance, if you don't want to use the L subpart, just remove # For instance, if you don't want to use the test subpart, just remove
# the line below containing "L" # the line below containing "test"
dataset_parts = ( dataset_parts = (
"dev_phase1", "dev_phase1",
"dev_phase2", "dev_phase2",