diff --git a/egs/libritts/ASR/local/compile_hlg.py b/egs/libritts/ASR/local/compile_hlg.py new file mode 120000 index 000000000..471aa7fb4 --- /dev/null +++ b/egs/libritts/ASR/local/compile_hlg.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/compile_hlg.py \ No newline at end of file diff --git a/egs/libritts/ASR/local/compile_lg.py b/egs/libritts/ASR/local/compile_lg.py new file mode 120000 index 000000000..462d6d3fb --- /dev/null +++ b/egs/libritts/ASR/local/compile_lg.py @@ -0,0 +1 @@ +../../../librispeech/ASR/local/compile_lg.py \ No newline at end of file diff --git a/egs/libritts/ASR/prepare.sh b/egs/libritts/ASR/prepare.sh index 4b551385f..9d9ce8f87 100755 --- a/egs/libritts/ASR/prepare.sh +++ b/egs/libritts/ASR/prepare.sh @@ -126,25 +126,25 @@ fi if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then log "Stage 5: Train BPE model for normalized text" - if [ ! -f data/texts ]; then + if [ ! -f data/text ]; then gunzip -c data/manifests/libritts_supervisions_train-clean-100.jsonl.gz \ | jq ".text" | sed 's/"//g' \ - | ./local/norm_text.py > data/texts + | ./local/norm_text.py > data/text gunzip -c data/manifests/libritts_supervisions_train-clean-360.jsonl.gz \ | jq ".text" | sed 's/"//g' \ - | ./local/norm_text.py >> data/texts + | ./local/norm_text.py >> data/text gunzip -c data/manifests/libritts_supervisions_train-other-500.jsonl.gz \ | jq ".text" | sed 's/"//g' \ - | ./local/norm_text.py >> data/texts + | ./local/norm_text.py >> data/text fi for vocab_size in ${vocab_sizes[@]}; do lang_dir=data/lang_bpe_${vocab_size} mkdir -p $lang_dir - cp data/texts $lang_dir/text + cp data/text $lang_dir/text if [ ! -f $lang_dir/bpe.model ]; then ./local/train_bpe_model.py \ diff --git a/egs/libritts/ASR/zipformer/decoder.py b/egs/libritts/ASR/zipformer/decoder.py new file mode 120000 index 000000000..5a8018680 --- /dev/null +++ b/egs/libritts/ASR/zipformer/decoder.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/decoder.py \ No newline at end of file diff --git a/egs/libritts/ASR/zipformer/train.py b/egs/libritts/ASR/zipformer/train.py index 98bbafc4a..0fa32d7f6 100755 --- a/egs/libritts/ASR/zipformer/train.py +++ b/egs/libritts/ASR/zipformer/train.py @@ -1351,8 +1351,8 @@ def run(rank, world_size, args): return True - train_cuts = train_cuts.filter(remove_short_and_long_utt) train_cuts = train_cuts.map(normalize_text) + train_cuts = train_cuts.filter(remove_short_and_long_utt) if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: # We only load the sampler's state dict when it loads a checkpoint