mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
update
This commit is contained in:
parent
282491429a
commit
3acbc2d44b
@ -87,7 +87,7 @@ def compute_fbank_commonvoice_splits(args):
|
|||||||
output_dir = Path(output_dir)
|
output_dir = Path(output_dir)
|
||||||
assert output_dir.exists(), f"{output_dir} does not exist!"
|
assert output_dir.exists(), f"{output_dir} does not exist!"
|
||||||
|
|
||||||
num_digits = len(str(num_splits))
|
num_digits = 8
|
||||||
|
|
||||||
start = args.start
|
start = args.start
|
||||||
stop = args.stop
|
stop = args.stop
|
||||||
|
|||||||
@ -173,7 +173,10 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
|
|||||||
if [ ! -f $lang_dir/transcript_words.txt ]; then
|
if [ ! -f $lang_dir/transcript_words.txt ]; then
|
||||||
log "Generate data for BPE training"
|
log "Generate data for BPE training"
|
||||||
file=$(
|
file=$(
|
||||||
find "data/fbank/peoples_speech_cuts_train.jsonl.gz"
|
find "data/fbank/peoples_speech_cuts_dirty_raw.jsonl.gz"
|
||||||
|
find "data/fbank/peoples_speech_cuts_dirty_sa_raw.jsonl.gz"
|
||||||
|
find "data/fbank/peoples_speech_cuts_clean_raw.jsonl.gz"
|
||||||
|
find "data/fbank/peoples_speech_cuts_clean_sa_raw.jsonl.gz"
|
||||||
)
|
)
|
||||||
gunzip -c ${file} | awk -F '"' '{print $30}' > $lang_dir/transcript_words.txt
|
gunzip -c ${file} | awk -F '"' '{print $30}' > $lang_dir/transcript_words.txt
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user