small fix

2025-08-10 18:42:19 +00:00 · 2024-09-07 23:41:15 +08:00 · 2024-09-07 23:41:15 +08:00 · b35924f361
commit b35924f361
parent 450d05d666
2 changed files with 17 additions and 7 deletions
--- a/egs/librilight/SSL/prepare.sh
+++ b/egs/librilight/SSL/prepare.sh
@ -6,9 +6,9 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 set -eou pipefail
 nj=15
-# run step 0 to step 5 by default
+# run step 0 to step 4 by default
 stage=0
-stop_stage=5
+stop_stage=4
 # We assume dl_dir (download dir) contains the following
 # directories and files. If not, they will be downloaded
@ -79,9 +79,16 @@ fi
 if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
  log "Stage 4: Extract SSL target for librilight"
-  mkdir -p data/fbank
+  if [ ! -e data/kmeans/.extract_small.done ]; then
-  if [ ! -e data/fbank/.librispeech.done ]; then
+    ./local/extract_kmeans_from_hubert_base.py --subset small
-    ./local/compute_fbank_librispeech.py
+    touch data/kmeans/.extract_small.done
-    touch data/fbank/.librispeech.done
+  fi
  if [ ! -e data/kmeans/.extract_medium.done ]; then
    ./local/extract_kmeans_from_hubert_base.py --subset medium
    touch data/kmeans/.extract_medium.done
  fi
  if [ ! -e data/kmeans/.extract_large.done ]; then
    ./local/extract_kmeans_from_hubert_base.py --subset large
    touch data/kmeans/.extract_large.done
  fi
 fi
--- a/egs/librilight/SSL/zipformer/pretrain.py
+++ b/egs/librilight/SSL/zipformer/pretrain.py
@ -953,7 +953,10 @@ def train_one_epoch(
                scheduler.step_batch(params.batch_idx_train)
                # Use the number of hours of speech to adjust the learning rate
                scheduler.step_epoch(
-                    params.batch_idx_train * params.max_duration * params.world_size / 3600
+                    params.batch_idx_train
                    * params.max_duration
                    * params.world_size
                    / 3600
                )
                scaler.step(optimizer)