From 7961b6bf23b3d603343608995dcb6e0e070e867d Mon Sep 17 00:00:00 2001 From: yfyeung Date: Wed, 10 Jul 2024 00:16:24 -0700 Subject: [PATCH] update --- egs/librispeech/ASR/decode.sh | 12 ++++++++++++ egs/librispeech/ASR/decode_single.sh | 8 ++++++++ egs/librispeech/ASR/sync.sh | 11 +++++++++++ egs/librispeech/ASR/zipformer_lstm/beam_search.py | 2 +- egs/librispeech/ASR/zipformer_lstm/decoder.py | 4 +++- 5 files changed, 35 insertions(+), 2 deletions(-) create mode 100755 egs/librispeech/ASR/decode.sh create mode 100755 egs/librispeech/ASR/decode_single.sh create mode 100755 egs/librispeech/ASR/sync.sh diff --git a/egs/librispeech/ASR/decode.sh b/egs/librispeech/ASR/decode.sh new file mode 100755 index 000000000..e67c0188d --- /dev/null +++ b/egs/librispeech/ASR/decode.sh @@ -0,0 +1,12 @@ +export CUDA_VISIBLE_DEVICES=2 + +for epoch in {30..30}; do + for ((avg=1; avg<=$epoch-1; avg++)); do + ./zipformer_lstm/decode.py \ + --epoch $epoch \ + --avg $avg \ + --exp-dir ./zipformer_lstm/exp_dropout0.2 \ + --max-duration 2000 \ + --decoding-method greedy_search + done +done diff --git a/egs/librispeech/ASR/decode_single.sh b/egs/librispeech/ASR/decode_single.sh new file mode 100755 index 000000000..9a340c3ff --- /dev/null +++ b/egs/librispeech/ASR/decode_single.sh @@ -0,0 +1,8 @@ +export CUDA_VISIBLE_DEVICES=$1 + +./zipformer_lstm/decode.py \ + --epoch $2 \ + --avg $3 \ + --exp-dir ./zipformer_lstm/exp \ + --max-duration 2000 \ + --decoding-method beam_search diff --git a/egs/librispeech/ASR/sync.sh b/egs/librispeech/ASR/sync.sh new file mode 100755 index 000000000..63f946ebf --- /dev/null +++ b/egs/librispeech/ASR/sync.sh @@ -0,0 +1,11 @@ +project=icefall-asr-librispeech-zipformer-2023-11-04 +run=4V10032G_lstm1_decoderdropout0.2_bpe500 +recipe=zipformer_lstm + +wandb sync ${recipe}/exp_dropout0.2/tensorboard/ --sync-tensorboard -p $project --id $run + +while true +do + wandb sync ${recipe}/exp_dropout0.2/tensorboard/ --sync-tensorboard -p $project --id $run --append + sleep 60 +done diff --git a/egs/librispeech/ASR/zipformer_lstm/beam_search.py b/egs/librispeech/ASR/zipformer_lstm/beam_search.py index ae896274c..2c1e6781a 100644 --- a/egs/librispeech/ASR/zipformer_lstm/beam_search.py +++ b/egs/librispeech/ASR/zipformer_lstm/beam_search.py @@ -36,7 +36,7 @@ def greedy_search(model: nn.Module, encoder_out: torch.Tensor) -> List[int]: # support only batch_size == 1 for now assert encoder_out.size(0) == 1, encoder_out.size(0) blank_id = model.decoder.blank_id - device = model.encoder_embed.device + device = next(model.parameters()).device sos = torch.tensor([blank_id], device=device, dtype=torch.int64).reshape(1, 1) decoder_out, (h, c) = model.decoder(sos) diff --git a/egs/librispeech/ASR/zipformer_lstm/decoder.py b/egs/librispeech/ASR/zipformer_lstm/decoder.py index 51c1952b9..c603e84ee 100644 --- a/egs/librispeech/ASR/zipformer_lstm/decoder.py +++ b/egs/librispeech/ASR/zipformer_lstm/decoder.py @@ -76,7 +76,7 @@ class Decoder(nn.Module): self.vocab_size = vocab_size - # self.embedding_dropout = nn.Dropout(embedding_dropout) + self.embedding_dropout = nn.Dropout(embedding_dropout) self.rnn = nn.LSTM( input_size=decoder_dim, @@ -113,6 +113,8 @@ class Decoder(nn.Module): # at utterance start, we use negative ids in beam_search.py embedding_out = self.embedding(y.clamp(min=0)) * (y >= 0).unsqueeze(-1) + embedding_out = self.embedding_dropout(embedding_out) + embedding_out = self.balancer(embedding_out) rnn_out, (h, c) = self.rnn(embedding_out, states)