From 2e37b29e66f52813707250f754224a447fe4ac10 Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Mon, 23 Aug 2021 13:57:46 +0800
Subject: [PATCH] Disable SpecAug for yesno.

Also replace Adam with SGD.
---
 .github/workflows/run-yesno-recipe.yml | 18 +++++-------------
 egs/yesno/ASR/tdnn/asr_datamodule.py   | 12 ------------
 egs/yesno/ASR/tdnn/decode.py           |  2 +-
 egs/yesno/ASR/tdnn/train.py            |  7 +++----
 4 files changed, 9 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/run-yesno-recipe.yml b/.github/workflows/run-yesno-recipe.yml
index 7e36139a3..2c9b59aba 100644
--- a/.github/workflows/run-yesno-recipe.yml
+++ b/.github/workflows/run-yesno-recipe.yml
@@ -69,21 +69,13 @@ jobs:
         run: |
           export PYTHONPATH=$PWD:$PYTHONPATH
           echo $PYTHONPATH
-          ls -lh
 
-          # The following three lines are for macOS
-          lib_path=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")
-          echo "lib_path: $lib_path"
-          export DYLD_LIBRARY_PATH=$lib_path:$DYLD_LIBRARY_PATH
-          ls -lh $lib_path
 
           cd egs/yesno/ASR
           ./prepare.sh
-          python3 ./tdnn/train.py --num-epochs 100
-          python3 ./tdnn/decode.py --epoch 99
-          python3 ./tdnn/decode.py --epoch 95
-          python3 ./tdnn/decode.py --epoch 90
-          python3 ./tdnn/decode.py --epoch 80
-          python3 ./tdnn/decode.py --epoch 70
-          python3 ./tdnn/decode.py --epoch 60
+          python3 ./tdnn/train.py
+          python3 ./tdnn/decode.py --avg 2
+          python3 ./tdnn/decode.py --avg 3
+          python3 ./tdnn/decode.py --avg 4
+          python3 ./tdnn/decode.py --avg 5
           # TODO: Check that the WER is less than some value
diff --git a/egs/yesno/ASR/tdnn/asr_datamodule.py b/egs/yesno/ASR/tdnn/asr_datamodule.py
index 8b2b44c8a..e6614e3ce 100644
--- a/egs/yesno/ASR/tdnn/asr_datamodule.py
+++ b/egs/yesno/ASR/tdnn/asr_datamodule.py
@@ -27,7 +27,6 @@ from lhotse.dataset import (
     K2SpeechRecognitionDataset,
     PrecomputedFeatures,
     SingleCutSampler,
-    SpecAugment,
 )
 from lhotse.dataset.input_strategies import OnTheFlyFeatures
 from torch.utils.data import DataLoader
@@ -163,18 +162,8 @@ class YesNoAsrDataModule(DataModule):
                 )
             ] + transforms
 
-        input_transforms = [
-            SpecAugment(
-                num_frame_masks=2,
-                features_mask_size=27,
-                num_feature_masks=2,
-                frames_mask_size=100,
-            )
-        ]
-
         train = K2SpeechRecognitionDataset(
             cut_transforms=transforms,
-            input_transforms=input_transforms,
             return_cuts=self.args.return_cuts,
         )
 
@@ -194,7 +183,6 @@ class YesNoAsrDataModule(DataModule):
                 input_strategy=OnTheFlyFeatures(
                     Fbank(FbankConfig(num_mel_bins=23))
                 ),
-                input_transforms=input_transforms,
                 return_cuts=self.args.return_cuts,
             )
 
diff --git a/egs/yesno/ASR/tdnn/decode.py b/egs/yesno/ASR/tdnn/decode.py
index a87219010..860ae3165 100755
--- a/egs/yesno/ASR/tdnn/decode.py
+++ b/egs/yesno/ASR/tdnn/decode.py
@@ -39,7 +39,7 @@ def get_parser():
     parser.add_argument(
         "--avg",
         type=int,
-        default=15,
+        default=4,
         help="Number of checkpoints to average. Automatically select "
         "consecutive checkpoints before the checkpoint specified by "
         "'--epoch'. ",
diff --git a/egs/yesno/ASR/tdnn/train.py b/egs/yesno/ASR/tdnn/train.py
index a5a248c9c..836dd2794 100755
--- a/egs/yesno/ASR/tdnn/train.py
+++ b/egs/yesno/ASR/tdnn/train.py
@@ -61,7 +61,7 @@ def get_parser():
     parser.add_argument(
         "--num-epochs",
         type=int,
-        default=50,
+        default=10,
         help="Number of epochs to train.",
     )
 
@@ -129,11 +129,10 @@ def get_params() -> AttributeDict:
         {
             "exp_dir": Path("tdnn/exp"),
             "lang_dir": Path("data/lang_phone"),
-            "lr": 1e-3,
+            "lr": 1e-1,
             "feature_dim": 23,
             "weight_decay": 1e-6,
             "start_epoch": 0,
-            "num_epochs": 50,
             "best_train_loss": float("inf"),
             "best_valid_loss": float("inf"),
             "best_train_epoch": -1,
@@ -491,7 +490,7 @@ def run(rank, world_size, args):
     if world_size > 1:
         model = DDP(model, device_ids=[rank])
 
-    optimizer = optim.AdamW(
+    optimizer = optim.SGD(
         model.parameters(),
         lr=params.lr,
         weight_decay=params.weight_decay,