mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-26 18:24:18 +00:00
fix index error
This commit is contained in:
parent
6fd14d202b
commit
be001a896c
@ -137,8 +137,8 @@ def compute_fbank_kespeech_splits(args):
|
||||
set_audio_duration_mismatch_tolerance(0.01) # 10ms tolerance
|
||||
set_caching_enabled(False)
|
||||
for i in range(start, stop):
|
||||
idx = f"{i + 1}".zfill(num_digits)
|
||||
logging.info(f"Processing {idx}/{num_splits}")
|
||||
idx = f"{i}".zfill(num_digits)
|
||||
logging.info(f"Processing {i+1}/{num_splits}")
|
||||
|
||||
cuts_path = output_dir / f"kespeech-asr_cuts_{subset}.{idx}.jsonl.gz"
|
||||
if cuts_path.is_file():
|
||||
|
@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
stage=120
|
||||
stop_stage=120
|
||||
stage=121
|
||||
stop_stage=121
|
||||
num_splits=100
|
||||
|
||||
dl_dir=$PWD/download
|
||||
@ -274,7 +274,7 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
||||
touch data/fbank/.kespeech_preprocess_complete
|
||||
fi
|
||||
|
||||
if [ -f data/fbank/.kespeech.train_phase1.split.${num_splits}.done ]; then
|
||||
if [ ! -f data/fbank/.kespeech.train_phase1.split.${num_splits}.done ]; then
|
||||
log "Spliting KeSpeech train_phase1"
|
||||
lhotse split ${num_splits} \
|
||||
data/fbank/kespeech/kespeech-asr_cuts_train_phase1_raw.jsonl.gz \
|
||||
@ -282,7 +282,7 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
||||
touch data/fbank/.kespeech.train_phase1.split.${num_splits}.done
|
||||
fi
|
||||
|
||||
if [ -f data/fbank/.kespeech.train_phase2.split.${num_splits}.done ]; then
|
||||
if [ ! -f data/fbank/.kespeech.train_phase2.split.${num_splits}.done ]; then
|
||||
log "Spliting KeSpeech train_phase2"
|
||||
lhotse split ${num_splits} \
|
||||
data/fbank/kespeech/kespeech-asr_cuts_train_phase2_raw.jsonl.gz \
|
||||
@ -327,7 +327,7 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
|
||||
touch data/fbank/.kespeech_preprocess_complete
|
||||
fi
|
||||
|
||||
if [ -f data/fbank/.kespeech.train_phase1.split.${num_splits}.done ]; then
|
||||
if [ ! -f data/fbank/.kespeech.train_phase1.split.${num_splits}.done ]; then
|
||||
log "Spliting KeSpeech train_phase1"
|
||||
lhotse split ${num_splits} \
|
||||
data/fbank/kespeech/kespeech-asr_cuts_train_phase1_raw.jsonl.gz \
|
||||
@ -335,7 +335,7 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
|
||||
touch data/fbank/.kespeech.train_phase1.split.${num_splits}.done
|
||||
fi
|
||||
|
||||
if [ -f data/fbank/.kespeech.train_phase2.split.${num_splits}.done ]; then
|
||||
if [ ! -f data/fbank/.kespeech.train_phase2.split.${num_splits}.done ]; then
|
||||
log "Spliting KeSpeech train_phase2"
|
||||
lhotse split ${num_splits} \
|
||||
data/fbank/kespeech/kespeech-asr_cuts_train_phase2_raw.jsonl.gz \
|
||||
@ -356,6 +356,21 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
|
||||
log "Stage 121: tmp"
|
||||
log "Compute KeSpeech fbank for train_phase1"
|
||||
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --stop 1 --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
|
||||
log "Compute KeSpeech fbank for train_phase2"
|
||||
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
|
||||
log "Compute KeSpeech fbank for test/dev"
|
||||
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
|
||||
|
||||
touch data/fbank/.kespeech.done
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
|
||||
log "Stage 13: BPE model training (note that we use transcripts of wenetspeech only for BPE training)"
|
||||
|
@ -150,8 +150,8 @@ def compute_fbank_wenetspeech_splits(args):
|
||||
set_caching_enabled(False)
|
||||
#with get_executor() as ex: # Initialize the executor only once.
|
||||
for i in range(start, stop):
|
||||
idx = f"{i + 1}".zfill(num_digits)
|
||||
logging.info(f"Processing {idx}/{num_splits}")
|
||||
idx = f"{i}".zfill(num_digits)
|
||||
logging.info(f"Processing {i+1}/{num_splits}")
|
||||
|
||||
cuts_path = output_dir / f"cuts_{subset}.{idx}.jsonl.gz"
|
||||
if cuts_path.is_file():
|
||||
|
Loading…
x
Reference in New Issue
Block a user