diff --git a/egs/mucs/ASR/conformer_ctc/decode.py b/egs/mucs/ASR/conformer_ctc/decode.py index 812e5db34..112d46f76 100755 --- a/egs/mucs/ASR/conformer_ctc/decode.py +++ b/egs/mucs/ASR/conformer_ctc/decode.py @@ -682,7 +682,7 @@ def main(): ): if not (params.lm_dir / "G_4_gram.pt").is_file(): logging.info("Loading G_4_gram.fst.txt") - logging.warning("It may take 8 minutes.") + logging.warning("It may take few minutes.") with open(params.lm_dir / "G_4_gram.fst.txt") as f: first_word_disambig_id = lexicon.word_table["#0"] diff --git a/egs/mucs/ASR/prepare.sh b/egs/mucs/ASR/prepare.sh index 44bbd8746..40fa7ffc5 100755 --- a/egs/mucs/ASR/prepare.sh +++ b/egs/mucs/ASR/prepare.sh @@ -6,7 +6,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python set -eou pipefail nj=60 -stage=9 +stage=-1 stop_stage=9 # We assume dl_dir (download dir) contains the following @@ -18,7 +18,8 @@ dl_dir=$PWD/download mkdir -p $dl_dir raw_data_path="/data/Database/MUCS/" -dataset="bn-en" #hin-en or bn-en +dataset="hi-en" #hin-en or bn-en + datadir="data_"$dataset raw_kaldi_files_path=$dl_dir/$dataset/ diff --git a/egs/mucs/ASR/run.sh b/egs/mucs/ASR/run.sh index 65dcea1f3..cafb0aa04 100755 --- a/egs/mucs/ASR/run.sh +++ b/egs/mucs/ASR/run.sh @@ -2,29 +2,36 @@ export CUDA_VISIBLE_DEVICES="0" set -e -dataset='bn-en' +dataset='hi-en' datadir=data_"$dataset" bpe=400 -decode_method="nbest" +decode_methods="attention-decoder 1best nbest ctc-decoding whole-lattice-rescoring" num_paths=20 +max_duration=5 -# ./conformer_ctc/train.py \ -# --num-epochs 60 \ -# --max-duration 300 \ -# --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \ -# --manifest-dir $datadir/fbank \ -# --lang-dir $datadir/lang_bpe_"$bpe" \ -# --enable-musan False \ - - -./conformer_ctc/decode.py \ - --epoch 59 \ - --avg 10 \ - --manifest-dir $datadir/fbank \ +./conformer_ctc/train.py \ + --num-epochs 60 \ + --max-duration 300 \ --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \ - --max-duration 5 \ + --manifest-dir $datadir/fbank \ --lang-dir $datadir/lang_bpe_"$bpe" \ - --lm-dir $datadir/"lm" \ - --method $decode_method \ - --num-paths $num_paths \ + --enable-musan False \ + +for decode_method in $decode_methods; +do + + ./conformer_ctc/decode.py \ + --epoch 59 \ + --avg 10 \ + --manifest-dir $datadir/fbank \ + --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \ + --max-duration $max_duration \ + --lang-dir $datadir/lang_bpe_"$bpe" \ + --lm-dir $datadir/"lm" \ + --method $decode_method \ + --num-paths $num_paths \ + +done +exit + \ No newline at end of file