loop over diff decode methods

This commit is contained in:
sathvik udupa 2023-05-03 12:25:49 +05:30
parent 66fb375dbb
commit 996554acde
3 changed files with 30 additions and 22 deletions

View File

@ -682,7 +682,7 @@ def main():
): ):
if not (params.lm_dir / "G_4_gram.pt").is_file(): if not (params.lm_dir / "G_4_gram.pt").is_file():
logging.info("Loading G_4_gram.fst.txt") logging.info("Loading G_4_gram.fst.txt")
logging.warning("It may take 8 minutes.") logging.warning("It may take few minutes.")
with open(params.lm_dir / "G_4_gram.fst.txt") as f: with open(params.lm_dir / "G_4_gram.fst.txt") as f:
first_word_disambig_id = lexicon.word_table["#0"] first_word_disambig_id = lexicon.word_table["#0"]

View File

@ -6,7 +6,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail set -eou pipefail
nj=60 nj=60
stage=9 stage=-1
stop_stage=9 stop_stage=9
# We assume dl_dir (download dir) contains the following # We assume dl_dir (download dir) contains the following
@ -18,7 +18,8 @@ dl_dir=$PWD/download
mkdir -p $dl_dir mkdir -p $dl_dir
raw_data_path="/data/Database/MUCS/" raw_data_path="/data/Database/MUCS/"
dataset="bn-en" #hin-en or bn-en dataset="hi-en" #hin-en or bn-en
datadir="data_"$dataset datadir="data_"$dataset
raw_kaldi_files_path=$dl_dir/$dataset/ raw_kaldi_files_path=$dl_dir/$dataset/

View File

@ -2,29 +2,36 @@
export CUDA_VISIBLE_DEVICES="0" export CUDA_VISIBLE_DEVICES="0"
set -e set -e
dataset='bn-en' dataset='hi-en'
datadir=data_"$dataset" datadir=data_"$dataset"
bpe=400 bpe=400
decode_method="nbest" decode_methods="attention-decoder 1best nbest ctc-decoding whole-lattice-rescoring"
num_paths=20 num_paths=20
max_duration=5
# ./conformer_ctc/train.py \ ./conformer_ctc/train.py \
# --num-epochs 60 \ --num-epochs 60 \
# --max-duration 300 \ --max-duration 300 \
# --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \
# --manifest-dir $datadir/fbank \
# --lang-dir $datadir/lang_bpe_"$bpe" \
# --enable-musan False \
./conformer_ctc/decode.py \
--epoch 59 \
--avg 10 \
--manifest-dir $datadir/fbank \
--exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \ --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \
--max-duration 5 \ --manifest-dir $datadir/fbank \
--lang-dir $datadir/lang_bpe_"$bpe" \ --lang-dir $datadir/lang_bpe_"$bpe" \
--lm-dir $datadir/"lm" \ --enable-musan False \
--method $decode_method \
--num-paths $num_paths \ for decode_method in $decode_methods;
do
./conformer_ctc/decode.py \
--epoch 59 \
--avg 10 \
--manifest-dir $datadir/fbank \
--exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \
--max-duration $max_duration \
--lang-dir $datadir/lang_bpe_"$bpe" \
--lm-dir $datadir/"lm" \
--method $decode_method \
--num-paths $num_paths \
done
exit