Use dropout in attention, on attn weights.

2025-12-11 06:55:27 +00:00 · 2022-09-22 19:18:50 +08:00 · 2022-09-22 19:18:50 +08:00 · ce3f59d9c7
commit ce3f59d9c7
parent 24aea947d2
1 changed files with 1 additions and 1 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
@ -168,7 +168,7 @@ class ConformerEncoderLayer(nn.Module):
        self.d_model = d_model
        self.self_attn = RelPositionMultiheadAttention(
-            d_model, nhead, dropout=0.0
+            d_model, nhead, dropout=dropout,
        )
        self.feed_forward = nn.Sequential(