mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
separate transcript prep stage from bpe train stage
This commit is contained in:
parent
6e81d9aa5b
commit
e69e1c04b2
@ -45,12 +45,15 @@ mkdir -p data/lang
|
||||
lang_dir=data/lang
|
||||
|
||||
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
log "Stage 1: Prepare BPE tokenizer"
|
||||
|
||||
log "Stage 1: Prepare transcript for BPE training"
|
||||
if [ ! -f $lang_dir/transcript.txt ]; then
|
||||
log "Generating transcripts for BPE training"
|
||||
./local/utils/generate_transcript.py --lang-dir $lang_dir
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
log "Stage 2: Prepare BPE tokenizer"
|
||||
|
||||
for vocab_size in ${vocab_sizes[@]}; do
|
||||
log "Training BPE model with vocab_size=${vocab_size}"
|
||||
|
Loading…
x
Reference in New Issue
Block a user