From 47e49a6663d268813357404324a5c59ff5e398b2 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 15 Feb 2022 12:33:51 +0800 Subject: [PATCH] change transcript_words.txt --- egs/tedlium3/ASR/prepare.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/egs/tedlium3/ASR/prepare.sh b/egs/tedlium3/ASR/prepare.sh index 55b0f8d29..053cc3941 100644 --- a/egs/tedlium3/ASR/prepare.sh +++ b/egs/tedlium3/ASR/prepare.sh @@ -121,7 +121,12 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then --lang-dir $lang_dir \ --manifests-dir data/manifests - cat download/tedlium3/TEDLIUM.152k.dic | grep -v -w "" | grep -v -w "" | grep -v -w "" | LANG= LC_ALL= sort | sed 's:([0-9])::g' > $lang_dir/lexicon_words.txt + cat download/tedlium3/TEDLIUM.152k.dic | + grep -v -w "" | + grep -v -w "" | + grep -v -w "" | + LANG= LC_ALL= sort | + sed 's:([0-9])::g' > $lang_dir/lexicon_words.txt (echo ' '; ) | cat - $lang_dir/lexicon_words.txt | @@ -146,6 +151,9 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then log "Generate data for BPE training" cat data/lang_phone/train.text | cut -d " " -f 2- > $lang_dir/transcript_words.txt + sed -i 's/ //g' $lang_dir/transcript_words.txt + sed -i 's/ //g' $lang_dir/transcript_words.txt + sed -i 's///g' $lang_dir/transcript_words.txt fi ./local/train_bpe_model.py \