mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-19 05:54:20 +00:00
Add BPE training
This commit is contained in:
parent
8e560e9880
commit
3c54619f84
1
egs/bengaliai_speech/ASR/local/prepare_lang_bpe.py
Symbolic link
1
egs/bengaliai_speech/ASR/local/prepare_lang_bpe.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/local/prepare_lang_bpe.py
|
@ -38,8 +38,8 @@ def get_args():
|
||||
|
||||
|
||||
def normalize_text(utt: str) -> str:
|
||||
punc = "~`!#$%^&*()_+-=|';\":/.,?><~·!@#¥%……&*()——+-=“:’;、。,?》《{}"
|
||||
return re.sub(r"[{0}]+".format(punc), "", utt).upper()
|
||||
opr_and_punc = "=\+\-\*\/%<>×÷" + "।,;:\?!'\.\"-\[\]\{\}\(\)–—―~"
|
||||
return re.sub(r"[{0}]+".format(opr_and_punc), "", utt).upper()
|
||||
|
||||
|
||||
def preprocess_bengaliai_speech(
|
||||
@ -109,7 +109,9 @@ def preprocess_bengaliai_speech(
|
||||
|
||||
|
||||
def main():
|
||||
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
||||
formatter = (
|
||||
"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
||||
)
|
||||
|
||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||
args = get_args()
|
||||
|
1
egs/bengaliai_speech/ASR/local/train_bpe_model.py
Symbolic link
1
egs/bengaliai_speech/ASR/local/train_bpe_model.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/local/train_bpe_model.py
|
1
egs/bengaliai_speech/ASR/local/validate_bpe_lexicon.py
Symbolic link
1
egs/bengaliai_speech/ASR/local/validate_bpe_lexicon.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/local/validate_bpe_lexicon.py
|
@ -135,7 +135,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
||||
--num-workers $nj \
|
||||
--batch-duration 600 \
|
||||
--start 0 \
|
||||
--num-splits 300
|
||||
--num-splits 233
|
||||
touch data/fbank/.bengaliai_speech_train.done
|
||||
fi
|
||||
fi
|
||||
|
Loading…
x
Reference in New Issue
Block a user