From 2e37b29e66f52813707250f754224a447fe4ac10 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 23 Aug 2021 13:57:46 +0800 Subject: [PATCH] Disable SpecAug for yesno. Also replace Adam with SGD. --- .github/workflows/run-yesno-recipe.yml | 18 +++++------------- egs/yesno/ASR/tdnn/asr_datamodule.py | 12 ------------ egs/yesno/ASR/tdnn/decode.py | 2 +- egs/yesno/ASR/tdnn/train.py | 7 +++---- 4 files changed, 9 insertions(+), 30 deletions(-) diff --git a/.github/workflows/run-yesno-recipe.yml b/.github/workflows/run-yesno-recipe.yml index 7e36139a3..2c9b59aba 100644 --- a/.github/workflows/run-yesno-recipe.yml +++ b/.github/workflows/run-yesno-recipe.yml @@ -69,21 +69,13 @@ jobs: run: | export PYTHONPATH=$PWD:$PYTHONPATH echo $PYTHONPATH - ls -lh - # The following three lines are for macOS - lib_path=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") - echo "lib_path: $lib_path" - export DYLD_LIBRARY_PATH=$lib_path:$DYLD_LIBRARY_PATH - ls -lh $lib_path cd egs/yesno/ASR ./prepare.sh - python3 ./tdnn/train.py --num-epochs 100 - python3 ./tdnn/decode.py --epoch 99 - python3 ./tdnn/decode.py --epoch 95 - python3 ./tdnn/decode.py --epoch 90 - python3 ./tdnn/decode.py --epoch 80 - python3 ./tdnn/decode.py --epoch 70 - python3 ./tdnn/decode.py --epoch 60 + python3 ./tdnn/train.py + python3 ./tdnn/decode.py --avg 2 + python3 ./tdnn/decode.py --avg 3 + python3 ./tdnn/decode.py --avg 4 + python3 ./tdnn/decode.py --avg 5 # TODO: Check that the WER is less than some value diff --git a/egs/yesno/ASR/tdnn/asr_datamodule.py b/egs/yesno/ASR/tdnn/asr_datamodule.py index 8b2b44c8a..e6614e3ce 100644 --- a/egs/yesno/ASR/tdnn/asr_datamodule.py +++ b/egs/yesno/ASR/tdnn/asr_datamodule.py @@ -27,7 +27,6 @@ from lhotse.dataset import ( K2SpeechRecognitionDataset, PrecomputedFeatures, SingleCutSampler, - SpecAugment, ) from lhotse.dataset.input_strategies import OnTheFlyFeatures from torch.utils.data import DataLoader @@ -163,18 +162,8 @@ class YesNoAsrDataModule(DataModule): ) ] + transforms - input_transforms = [ - SpecAugment( - num_frame_masks=2, - features_mask_size=27, - num_feature_masks=2, - frames_mask_size=100, - ) - ] - train = K2SpeechRecognitionDataset( cut_transforms=transforms, - input_transforms=input_transforms, return_cuts=self.args.return_cuts, ) @@ -194,7 +183,6 @@ class YesNoAsrDataModule(DataModule): input_strategy=OnTheFlyFeatures( Fbank(FbankConfig(num_mel_bins=23)) ), - input_transforms=input_transforms, return_cuts=self.args.return_cuts, ) diff --git a/egs/yesno/ASR/tdnn/decode.py b/egs/yesno/ASR/tdnn/decode.py index a87219010..860ae3165 100755 --- a/egs/yesno/ASR/tdnn/decode.py +++ b/egs/yesno/ASR/tdnn/decode.py @@ -39,7 +39,7 @@ def get_parser(): parser.add_argument( "--avg", type=int, - default=15, + default=4, help="Number of checkpoints to average. Automatically select " "consecutive checkpoints before the checkpoint specified by " "'--epoch'. ", diff --git a/egs/yesno/ASR/tdnn/train.py b/egs/yesno/ASR/tdnn/train.py index a5a248c9c..836dd2794 100755 --- a/egs/yesno/ASR/tdnn/train.py +++ b/egs/yesno/ASR/tdnn/train.py @@ -61,7 +61,7 @@ def get_parser(): parser.add_argument( "--num-epochs", type=int, - default=50, + default=10, help="Number of epochs to train.", ) @@ -129,11 +129,10 @@ def get_params() -> AttributeDict: { "exp_dir": Path("tdnn/exp"), "lang_dir": Path("data/lang_phone"), - "lr": 1e-3, + "lr": 1e-1, "feature_dim": 23, "weight_decay": 1e-6, "start_epoch": 0, - "num_epochs": 50, "best_train_loss": float("inf"), "best_valid_loss": float("inf"), "best_train_epoch": -1, @@ -491,7 +490,7 @@ def run(rank, world_size, args): if world_size > 1: model = DDP(model, device_ids=[rank]) - optimizer = optim.AdamW( + optimizer = optim.SGD( model.parameters(), lr=params.lr, weight_decay=params.weight_decay,