minor updates

This commit is contained in:
JinZr 2024-10-07 23:32:03 +08:00
parent 32a7d2222d
commit f0744877a6
5 changed files with 9 additions and 6 deletions

View File

@ -0,0 +1 @@
../../../librispeech/ASR/local/compile_hlg.py

View File

@ -0,0 +1 @@
../../../librispeech/ASR/local/compile_lg.py

View File

@ -126,25 +126,25 @@ fi
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Train BPE model for normalized text"
if [ ! -f data/texts ]; then
if [ ! -f data/text ]; then
gunzip -c data/manifests/libritts_supervisions_train-clean-100.jsonl.gz \
| jq ".text" | sed 's/"//g' \
| ./local/norm_text.py > data/texts
| ./local/norm_text.py > data/text
gunzip -c data/manifests/libritts_supervisions_train-clean-360.jsonl.gz \
| jq ".text" | sed 's/"//g' \
| ./local/norm_text.py >> data/texts
| ./local/norm_text.py >> data/text
gunzip -c data/manifests/libritts_supervisions_train-other-500.jsonl.gz \
| jq ".text" | sed 's/"//g' \
| ./local/norm_text.py >> data/texts
| ./local/norm_text.py >> data/text
fi
for vocab_size in ${vocab_sizes[@]}; do
lang_dir=data/lang_bpe_${vocab_size}
mkdir -p $lang_dir
cp data/texts $lang_dir/text
cp data/text $lang_dir/text
if [ ! -f $lang_dir/bpe.model ]; then
./local/train_bpe_model.py \

View File

@ -0,0 +1 @@
../../../librispeech/ASR/zipformer/decoder.py

View File

@ -1351,8 +1351,8 @@ def run(rank, world_size, args):
return True
train_cuts = train_cuts.filter(remove_short_and_long_utt)
train_cuts = train_cuts.map(normalize_text)
train_cuts = train_cuts.filter(remove_short_and_long_utt)
if params.start_batch > 0 and checkpoints and "sampler" in checkpoints:
# We only load the sampler's state dict when it loads a checkpoint