From 4d7eefb02d201ee5c6cc5d7920bed9d3ed77ca22 Mon Sep 17 00:00:00 2001
From: sathvik udupa <sathvikudupa66@gmail.com>
Date: Mon, 26 Jun 2023 15:08:40 +0530
Subject: [PATCH] add results

---
 egs/mucs/ASR/RESULTS.md | 74 +++++++++++++++++++++++++++++++++++++++++
 egs/mucs/ASR/run.sh     | 20 ++++++-----
 2 files changed, 85 insertions(+), 9 deletions(-)
 create mode 100644 egs/mucs/ASR/RESULTS.md

diff --git a/egs/mucs/ASR/RESULTS.md b/egs/mucs/ASR/RESULTS.md
new file mode 100644
index 000000000..4c0d3291d
--- /dev/null
+++ b/egs/mucs/ASR/RESULTS.md
@@ -0,0 +1,74 @@
+# Results for mucs hi-en and bn-en
+
+This page shows the WERs for the code switched test corpus of MUCS hi-en and bn-en.
+
+## using conformer ctc
+
+The following results are obtained with run.sh
+
+Specify the language through dataset arg (hi-en or bn-en)
+LM is trained using kenlm, with the training corpus
+
+Here are the results with different decoding methods
+
+bn-en
+|                         | test  |
+|-------------------------|-------|
+| ctc decoding            | 31.72 |
+| 1best                   | 28.05 |
+| nbest                   | 27.92 |
+| nbest-rescoring         | 27.22 |
+| whole-lattice-rescoring | 27.24 |
+| attention-decoder       | 26.46 |
+
+hi-en
+|                         | test  |
+|-------------------------|-------|
+| ctc decoding            | 31.43 |
+| 1best                   | 28.48 |
+| nbest                   | 28.55 |
+| nbest-rescoring         | 28.23 |
+| whole-lattice-rescoring | 28.77 |
+| attention-decoder       | 28.16 |
+
+The training command for reproducing is given below:
+```bash
+cd egs/mucs/ASR/
+./prepare.sh
+
+dataset="hi-en" #hi-en or bn-en
+bpe=400
+datadir=data_"$dataset"
+./conformer_ctc/train.py \
+    --num-epochs 60 \
+    --max-duration 300 \
+    --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \
+    --manifest-dir $datadir/fbank \
+    --lang-dir $datadir/lang_bpe_"$bpe" \
+    --enable-musan False \
+```
+
+The decoding command is given below:
+```bash
+dataset="hi-en" #hi-en or bn-en
+bpe=400
+datadir=data_"$dataset"
+num_paths=10
+max_duration=10
+decode_methods="attention-decoder 1best nbest nbest-rescoring ctc-decoding whole-lattice-rescoring"
+
+for decode_method in $decode_methods; 
+do
+    ./conformer_ctc/decode.py \
+        --epoch 59 \
+        --avg 10 \
+        --manifest-dir $datadir/fbank \
+        --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \
+        --max-duration $max_duration \
+        --lang-dir $datadir/lang_bpe_"$bpe" \
+        --lm-dir $datadir/"lm" \
+        --method $decode_method \
+        --num-paths $num_paths \
+        
+done
+```
\ No newline at end of file
diff --git a/egs/mucs/ASR/run.sh b/egs/mucs/ASR/run.sh
index cafb0aa04..4739b7850 100755
--- a/egs/mucs/ASR/run.sh
+++ b/egs/mucs/ASR/run.sh
@@ -5,17 +5,19 @@ set -e
 dataset='hi-en'
 datadir=data_"$dataset"
 bpe=400
-decode_methods="attention-decoder 1best nbest ctc-decoding whole-lattice-rescoring"
-num_paths=20
+# decode_methods="attention-decoder 1best nbest nbest-rescoring ctc-decoding whole-lattice-rescoring"
+decode_methods="nbest nbest-rescoring whole-lattice-rescoring"
+
+num_paths=10
 max_duration=5
 
-./conformer_ctc/train.py \
-    --num-epochs 60 \
-    --max-duration 300 \
-    --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \
-    --manifest-dir $datadir/fbank \
-    --lang-dir $datadir/lang_bpe_"$bpe" \
-    --enable-musan False \
+# ./conformer_ctc/train.py \
+#     --num-epochs 60 \
+#     --max-duration 300 \
+#     --exp-dir ./conformer_ctc/exp_"$dataset"_bpe"$bpe" \
+#     --manifest-dir $datadir/fbank \
+#     --lang-dir $datadir/lang_bpe_"$bpe" \
+#     --enable-musan False \
 
 for decode_method in $decode_methods; 
 do