mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-13 12:02:21 +00:00
fix prepare.sh
This commit is contained in:
parent
77560cd5e8
commit
de469c0b65
@ -132,11 +132,12 @@ fi
|
|||||||
|
|
||||||
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
||||||
log "Stage 4: Extract speech tokens."
|
log "Stage 4: Extract speech tokens."
|
||||||
|
mkdir -p $tokens_dir
|
||||||
for subset in small medium large; do
|
for subset in small medium large; do
|
||||||
log "Extract speech tokens for subset: $subset"
|
if [ ! -e $tokens_dir/libriheavy_${subset}.jsonl.gz ]; then
|
||||||
output_dir=$tokens_dir/libriheavy_${subset}
|
echo $tokens_dir/libriheavy_${subset}.jsonl.gz
|
||||||
mkdir -p $tokens_dir
|
log "Extract speech tokens for subset: $subset"
|
||||||
if [ ! -e $tokens_dir/.extract_completed ]; then
|
output_dir=$tokens_dir/libriheavy_${subset}
|
||||||
torchrun --nproc_per_node=8 \
|
torchrun --nproc_per_node=8 \
|
||||||
--nnodes=1 \
|
--nnodes=1 \
|
||||||
--rdzv_id=2024 \
|
--rdzv_id=2024 \
|
||||||
@ -148,8 +149,8 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
|||||||
--output_dir $output_dir \
|
--output_dir $output_dir \
|
||||||
--batch_size 32 \
|
--batch_size 32 \
|
||||||
--model "speech_tokenizer_v1"
|
--model "speech_tokenizer_v1"
|
||||||
|
|
||||||
cat $output_dir/part* | gzip > $output_dir/libriheavy_${subset}.jsonl.gz && rm -rf $output_dir
|
cat $output_dir/part* | gzip > $output_dir/libriheavy_${subset}.jsonl.gz && rm -rf $output_dir
|
||||||
touch $output_dir/.extract_completed
|
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
Loading…
x
Reference in New Issue
Block a user