mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-06 07:34:21 +00:00
Update prepare_lm_training_data.py
This commit is contained in:
parent
1a11440014
commit
94f963baf8
@ -92,7 +92,6 @@ def main():
|
|||||||
word2bpe = [] # Will be a list-of-list-of-int, representing BPE pieces.
|
word2bpe = [] # Will be a list-of-list-of-int, representing BPE pieces.
|
||||||
sentences = [] # Will be a list-of-list-of-int, representing word-ids.
|
sentences = [] # Will be a list-of-list-of-int, representing word-ids.
|
||||||
|
|
||||||
num_lines_in_total = None
|
|
||||||
step = 500000
|
step = 500000
|
||||||
|
|
||||||
processed = 0
|
processed = 0
|
||||||
@ -105,10 +104,7 @@ def main():
|
|||||||
break
|
break
|
||||||
|
|
||||||
if step and processed % step == 0:
|
if step and processed % step == 0:
|
||||||
logging.info(
|
logging.info(f"Processed number of lines: {processed} ")
|
||||||
f"Processed number of lines: {processed} "
|
|
||||||
f"({processed/num_lines_in_total*100: .3f}%)"
|
|
||||||
)
|
|
||||||
processed += 1
|
processed += 1
|
||||||
|
|
||||||
line_words = line.split()
|
line_words = line.split()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user