Merge branch 'dev_swbd' of https://github.com/JinZr/icefall into dev_swbd

This commit is contained in:
JinZr 2023-08-01 19:12:10 +08:00
commit e38afc407d
2 changed files with 12 additions and 14 deletions

View File

@ -74,12 +74,13 @@ def get_args():
def compute_fbank_switchboard( def compute_fbank_switchboard(
dir_name: str,
bpe_model: Optional[str] = None, bpe_model: Optional[str] = None,
dataset: Optional[str] = None, dataset: Optional[str] = None,
perturb_speed: Optional[bool] = True, perturb_speed: Optional[bool] = True,
): ):
src_dir = Path("data/manifests") src_dir = Path(f"data/manifests/{dir_name}")
output_dir = Path("data/fbank") output_dir = Path(f"data/fbank/{dir_name}")
num_jobs = min(15, os.cpu_count()) num_jobs = min(15, os.cpu_count())
num_mel_bins = 80 num_mel_bins = 80
@ -89,11 +90,11 @@ def compute_fbank_switchboard(
sp.load(bpe_model) sp.load(bpe_model)
if dataset is None: if dataset is None:
dataset_parts = ("all", "eval2000", "rt03") dataset_parts = ("all")
else: else:
dataset_parts = dataset.split(" ", -1) dataset_parts = dataset.split(" ", -1)
prefix = "swbd" prefix = dir_name
suffix = "jsonl.gz" suffix = "jsonl.gz"
manifests = read_manifests_if_cached( manifests = read_manifests_if_cached(
dataset_parts=dataset_parts, dataset_parts=dataset_parts,
@ -151,7 +152,9 @@ if __name__ == "__main__":
logging.basicConfig(format=formatter, level=logging.INFO) logging.basicConfig(format=formatter, level=logging.INFO)
args = get_args() args = get_args()
logging.info(vars(args)) logging.info(vars(args))
for dir_name in ["swbd"]:
compute_fbank_switchboard( compute_fbank_switchboard(
dir_name=dir_name,
bpe_model=args.bpe_model, bpe_model=args.bpe_model,
dataset=args.dataset, dataset=args.dataset,
perturb_speed=args.perturb_speed, perturb_speed=args.perturb_speed,

View File

@ -74,11 +74,6 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
data/manifests/swbd/swbd_supervisions_norm.jsonl data/manifests/swbd/swbd_supervisions_norm.jsonl
cp data/manifests/swbd/swbd_recordings.jsonl data/manifests/recordings_swbd.jsonl cp data/manifests/swbd/swbd_recordings.jsonl data/manifests/recordings_swbd.jsonl
./local/swbd1_prepare_dict.sh
./local/swbd1_data_prep.sh $swbd1_dir
lhotse kaldi import data/local/train 8000 data/manifests_train
mv data/manifests_train/recordings.jsonl.gz data/manifests_train/swbd_recordings_all.jsonl.gz
mv data/manifests_train/supervisions.jsonl.gz data/manifests_train/swbd_supervisions_all.jsonl.gz
lhotse prepare $eval2000_dir data/manifests_eval2000 lhotse prepare $eval2000_dir data/manifests_eval2000
./local/normalize_eval2000.py \ ./local/normalize_eval2000.py \