add kespeech whisper feats

This commit is contained in:
Yuekai Zhang 2024-02-19 23:03:49 +08:00
parent ff75cf6cb3
commit 6fd14d202b
2 changed files with 57 additions and 3 deletions

View File

@ -45,7 +45,7 @@ torch.set_num_interop_threads(1)
def compute_fbank_aishell2(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests")
output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count())
num_jobs = min(8, os.cpu_count())
dataset_parts = (
"train",

View File

@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail
stage=-1
stop_stage=100
stage=120
stop_stage=120
num_splits=100
dl_dir=$PWD/download
@ -303,6 +303,60 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
fi
fi
whisper_mel_bins=80
if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
log "Stage 120: Prepare KeSpeech for whisper"
if [ ! -d $dl_dir/KeSpeech ]; then
log "Abort! Please download KeSpeech first."
log "KeSpeech download link: https://github.com/KeSpeech/KeSpeech"
exit 1
fi
if [ ! -f data/manifests/.kespeech.done ]; then
mkdir -p data/manifests
lhotse prepare kespeech -j 8 $dl_dir/KeSpeech data/manifests/kespeech
touch data/manifests/.kespeech.done
fi
if [ ! -f data/fbank/.kespeech.done ]; then
mkdir -p data/fbank
log "Preprocess KeSpeech manifest"
if [ ! -f data/fbank/.kespeech_preprocess_complete ]; then
python3 ./local/preprocess_kespeech.py
touch data/fbank/.kespeech_preprocess_complete
fi
if [ -f data/fbank/.kespeech.train_phase1.split.${num_splits}.done ]; then
log "Spliting KeSpeech train_phase1"
lhotse split ${num_splits} \
data/fbank/kespeech/kespeech-asr_cuts_train_phase1_raw.jsonl.gz \
data/fbank/kespeech/train_phase1_split_${num_splits}
touch data/fbank/.kespeech.train_phase1.split.${num_splits}.done
fi
if [ -f data/fbank/.kespeech.train_phase2.split.${num_splits}.done ]; then
log "Spliting KeSpeech train_phase2"
lhotse split ${num_splits} \
data/fbank/kespeech/kespeech-asr_cuts_train_phase2_raw.jsonl.gz \
data/fbank/kespeech/train_phase2_split_${num_splits}
touch data/fbank/.kespeech.train_phase2.split.${num_splits}.done
fi
log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.kespeech.done
fi
fi
if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
log "Stage 13: BPE model training (note that we use transcripts of wenetspeech only for BPE training)"
./local/prepare_for_bpe_model.py --lang-dir ./data/lang_char --text ./data/lang_char/text