mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-27 02:34:21 +00:00
add speed perturb for kespeech
This commit is contained in:
parent
5a62723f19
commit
73e5caecc5
@ -78,9 +78,9 @@ fi
|
|||||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||||
log "Stage 2: Compute fbank for aishell4"
|
log "Stage 2: Compute fbank for aishell4"
|
||||||
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
|
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
|
||||||
mkdir -p data/fbank/aishell4
|
mkdir -p data/fbank
|
||||||
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
|
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
|
||||||
touch data/fbank/aishell4/.fbank.done
|
touch data/fbank/.fbank.done
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -88,9 +88,9 @@ whisper_mel_bins=80
|
|||||||
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
|
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
|
||||||
log "Stage 20: Compute whisper fbank for aishell4"
|
log "Stage 20: Compute whisper fbank for aishell4"
|
||||||
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
|
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
|
||||||
mkdir -p data/fbank/aishell4
|
mkdir -p data/fbank
|
||||||
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||||
touch data/fbank/aishell4/.fbank.done
|
touch data/fbank/.fbank.done
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False
|
|||||||
"test",
|
"test",
|
||||||
)
|
)
|
||||||
|
|
||||||
prefix = "alimeeting"
|
prefix = "alimeeting-far"
|
||||||
suffix = "jsonl.gz"
|
suffix = "jsonl.gz"
|
||||||
manifests = read_manifests_if_cached(
|
manifests = read_manifests_if_cached(
|
||||||
dataset_parts=dataset_parts,
|
dataset_parts=dataset_parts,
|
||||||
|
@ -67,18 +67,20 @@ fi
|
|||||||
|
|
||||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||||
log "Stage 2: compute fbank for alimeeting"
|
log "Stage 2: compute fbank for alimeeting"
|
||||||
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
|
if [ ! -f data/fbank/.fbank.done ]; then
|
||||||
mkdir -p data/fbank/alimeeting
|
mkdir -p data/fbank
|
||||||
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed}
|
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed}
|
||||||
|
touch data/fbank/.fbank.done
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
whisper_mel_bins=80
|
whisper_mel_bins=80
|
||||||
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
|
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
|
||||||
log "Stage 20: compute whisper fbank for alimeeting"
|
log "Stage 20: compute whisper fbank for alimeeting"
|
||||||
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
|
if [ ! -f data/fbank/.fbank.done ]; then
|
||||||
mkdir -p data/fbank/alimeeting
|
mkdir -p data/fbank
|
||||||
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||||
|
touch data/fbank/.fbank.done
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
|||||||
|
|
||||||
set -eou pipefail
|
set -eou pipefail
|
||||||
|
|
||||||
stage=121
|
stage=120
|
||||||
stop_stage=121
|
stop_stage=120
|
||||||
num_splits=100
|
num_splits=100
|
||||||
|
|
||||||
dl_dir=$PWD/download
|
dl_dir=$PWD/download
|
||||||
@ -86,7 +86,7 @@ fi
|
|||||||
log "Dataset: AISHELL-2"
|
log "Dataset: AISHELL-2"
|
||||||
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
||||||
log "Stage 4: Prepare AISHELL-2"
|
log "Stage 4: Prepare AISHELL-2"
|
||||||
if [ -e ../../aishell/ASR/data/fbank/.aishell2.done ]; then
|
if [ -e ../../aishell2/ASR/data/fbank/.aishell2.done ]; then
|
||||||
cd data/fbank
|
cd data/fbank
|
||||||
ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_feats_train) .
|
ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_feats_train) .
|
||||||
ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_feats_dev) .
|
ln -svf $(realpath ../../../../aishell2/ASR/data/fbank/aishell2_feats_dev) .
|
||||||
@ -104,7 +104,7 @@ fi
|
|||||||
log "Dataset: AISHELL-4"
|
log "Dataset: AISHELL-4"
|
||||||
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
||||||
log "Stage 5: Prepare AISHELL-4"
|
log "Stage 5: Prepare AISHELL-4"
|
||||||
if [ -e ../../aishell/ASR/data/fbank/.aishell4.done ]; then
|
if [ -e ../../aishell4/ASR/data/fbank/.fbank.done ]; then
|
||||||
cd data/fbank
|
cd data/fbank
|
||||||
ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_feats_train) .
|
ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_feats_train) .
|
||||||
ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_feats_dev) .
|
ln -svf $(realpath ../../../../aishell4/ASR/data/fbank/aishell4_feats_dev) .
|
||||||
@ -323,7 +323,7 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
|
|||||||
|
|
||||||
log "Preprocess KeSpeech manifest"
|
log "Preprocess KeSpeech manifest"
|
||||||
if [ ! -f data/fbank/.kespeech_preprocess_complete ]; then
|
if [ ! -f data/fbank/.kespeech_preprocess_complete ]; then
|
||||||
python3 ./local/preprocess_kespeech.py
|
python3 ./local/preprocess_kespeech.py --speed-perturb true
|
||||||
touch data/fbank/.kespeech_preprocess_complete
|
touch data/fbank/.kespeech_preprocess_complete
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -350,20 +350,12 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
|
|||||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||||
|
|
||||||
log "Compute KeSpeech fbank for test/dev"
|
log "Compute KeSpeech fbank for test/dev"
|
||||||
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
# ./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||||
|
|
||||||
touch data/fbank/.kespeech.done
|
touch data/fbank/.kespeech.done
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le 122 ] && [ $stop_stage -ge 122 ]; then
|
|
||||||
log "Stage 122: Prepare speed perturb versionKeSpeech for whisper"
|
|
||||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
|
||||||
|
|
||||||
log "Compute KeSpeech fbank for train_phase2"
|
|
||||||
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
|
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
|
||||||
log "Stage 121: Prepare MagicData, Primewords, ST-CMDS, THCHS-30 for whisper"
|
log "Stage 121: Prepare MagicData, Primewords, ST-CMDS, THCHS-30 for whisper"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user