mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
update
This commit is contained in:
parent
282491429a
commit
3acbc2d44b
@ -87,7 +87,7 @@ def compute_fbank_commonvoice_splits(args):
|
||||
output_dir = Path(output_dir)
|
||||
assert output_dir.exists(), f"{output_dir} does not exist!"
|
||||
|
||||
num_digits = len(str(num_splits))
|
||||
num_digits = 8
|
||||
|
||||
start = args.start
|
||||
stop = args.stop
|
||||
|
||||
@ -173,7 +173,10 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
|
||||
if [ ! -f $lang_dir/transcript_words.txt ]; then
|
||||
log "Generate data for BPE training"
|
||||
file=$(
|
||||
find "data/fbank/peoples_speech_cuts_train.jsonl.gz"
|
||||
find "data/fbank/peoples_speech_cuts_dirty_raw.jsonl.gz"
|
||||
find "data/fbank/peoples_speech_cuts_dirty_sa_raw.jsonl.gz"
|
||||
find "data/fbank/peoples_speech_cuts_clean_raw.jsonl.gz"
|
||||
find "data/fbank/peoples_speech_cuts_clean_sa_raw.jsonl.gz"
|
||||
)
|
||||
gunzip -c ${file} | awk -F '"' '{print $30}' > $lang_dir/transcript_words.txt
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user