small fix

This commit is contained in:
Yifan Yang 2024-09-07 23:41:15 +08:00
parent 450d05d666
commit b35924f361
2 changed files with 17 additions and 7 deletions

View File

@ -6,9 +6,9 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail set -eou pipefail
nj=15 nj=15
# run step 0 to step 5 by default # run step 0 to step 4 by default
stage=0 stage=0
stop_stage=5 stop_stage=4
# We assume dl_dir (download dir) contains the following # We assume dl_dir (download dir) contains the following
# directories and files. If not, they will be downloaded # directories and files. If not, they will be downloaded
@ -79,9 +79,16 @@ fi
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
log "Stage 4: Extract SSL target for librilight" log "Stage 4: Extract SSL target for librilight"
mkdir -p data/fbank if [ ! -e data/kmeans/.extract_small.done ]; then
if [ ! -e data/fbank/.librispeech.done ]; then ./local/extract_kmeans_from_hubert_base.py --subset small
./local/compute_fbank_librispeech.py touch data/kmeans/.extract_small.done
touch data/fbank/.librispeech.done fi
if [ ! -e data/kmeans/.extract_medium.done ]; then
./local/extract_kmeans_from_hubert_base.py --subset medium
touch data/kmeans/.extract_medium.done
fi
if [ ! -e data/kmeans/.extract_large.done ]; then
./local/extract_kmeans_from_hubert_base.py --subset large
touch data/kmeans/.extract_large.done
fi fi
fi fi

View File

@ -953,7 +953,10 @@ def train_one_epoch(
scheduler.step_batch(params.batch_idx_train) scheduler.step_batch(params.batch_idx_train)
# Use the number of hours of speech to adjust the learning rate # Use the number of hours of speech to adjust the learning rate
scheduler.step_epoch( scheduler.step_epoch(
params.batch_idx_train * params.max_duration * params.world_size / 3600 params.batch_idx_train
* params.max_duration
* params.world_size
/ 3600
) )
scaler.step(optimizer) scaler.step(optimizer)