Use dropout in attention, on attn weights.

This commit is contained in:
Daniel Povey 2022-09-22 19:18:50 +08:00
parent 24aea947d2
commit ce3f59d9c7

View File

@ -168,7 +168,7 @@ class ConformerEncoderLayer(nn.Module):
self.d_model = d_model
self.self_attn = RelPositionMultiheadAttention(
d_model, nhead, dropout=0.0
d_model, nhead, dropout=dropout,
)
self.feed_forward = nn.Sequential(