From 6fd14d202b8cd7ef3db1d377f3373fb883573c4e Mon Sep 17 00:00:00 2001 From: Yuekai Zhang Date: Mon, 19 Feb 2024 23:03:49 +0800 Subject: [PATCH] add kespeech whisper feats --- .../ASR/local/compute_fbank_aishell2.py | 2 +- egs/multi_zh-hans/ASR/prepare.sh | 58 ++++++++++++++++++- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/egs/aishell2/ASR/local/compute_fbank_aishell2.py b/egs/aishell2/ASR/local/compute_fbank_aishell2.py index f431966c2..dc2c4d153 100755 --- a/egs/aishell2/ASR/local/compute_fbank_aishell2.py +++ b/egs/aishell2/ASR/local/compute_fbank_aishell2.py @@ -45,7 +45,7 @@ torch.set_num_interop_threads(1) def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False): src_dir = Path("data/manifests") output_dir = Path("data/fbank") - num_jobs = min(15, os.cpu_count()) + num_jobs = min(8, os.cpu_count()) dataset_parts = ( "train", diff --git a/egs/multi_zh-hans/ASR/prepare.sh b/egs/multi_zh-hans/ASR/prepare.sh index c09b9c1de..003556469 100755 --- a/egs/multi_zh-hans/ASR/prepare.sh +++ b/egs/multi_zh-hans/ASR/prepare.sh @@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python set -eou pipefail -stage=-1 -stop_stage=100 +stage=120 +stop_stage=120 num_splits=100 dl_dir=$PWD/download @@ -303,6 +303,60 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then fi fi +whisper_mel_bins=80 +if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then + log "Stage 120: Prepare KeSpeech for whisper" + if [ ! -d $dl_dir/KeSpeech ]; then + log "Abort! Please download KeSpeech first." + log "KeSpeech download link: https://github.com/KeSpeech/KeSpeech" + exit 1 + fi + + if [ ! -f data/manifests/.kespeech.done ]; then + mkdir -p data/manifests + lhotse prepare kespeech -j 8 $dl_dir/KeSpeech data/manifests/kespeech + touch data/manifests/.kespeech.done + fi + + if [ ! -f data/fbank/.kespeech.done ]; then + mkdir -p data/fbank + + log "Preprocess KeSpeech manifest" + if [ ! -f data/fbank/.kespeech_preprocess_complete ]; then + python3 ./local/preprocess_kespeech.py + touch data/fbank/.kespeech_preprocess_complete + fi + + if [ -f data/fbank/.kespeech.train_phase1.split.${num_splits}.done ]; then + log "Spliting KeSpeech train_phase1" + lhotse split ${num_splits} \ + data/fbank/kespeech/kespeech-asr_cuts_train_phase1_raw.jsonl.gz \ + data/fbank/kespeech/train_phase1_split_${num_splits} + touch data/fbank/.kespeech.train_phase1.split.${num_splits}.done + fi + + if [ -f data/fbank/.kespeech.train_phase2.split.${num_splits}.done ]; then + log "Spliting KeSpeech train_phase2" + lhotse split ${num_splits} \ + data/fbank/kespeech/kespeech-asr_cuts_train_phase2_raw.jsonl.gz \ + data/fbank/kespeech/train_phase2_split_${num_splits} + touch data/fbank/.kespeech.train_phase2.split.${num_splits}.done + fi + + log "Compute KeSpeech fbank for train_phase1" + ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true + + log "Compute KeSpeech fbank for train_phase2" + ./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true + + log "Compute KeSpeech fbank for test/dev" + ./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true + + touch data/fbank/.kespeech.done + fi +fi + + if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then log "Stage 13: BPE model training (note that we use transcripts of wenetspeech only for BPE training)" ./local/prepare_for_bpe_model.py --lang-dir ./data/lang_char --text ./data/lang_char/text