Use dropout in attention, on attn weights.

This commit is contained in:
Daniel Povey 2022-09-22 19:18:50 +08:00
parent 24aea947d2
commit ce3f59d9c7

View File

@ -168,7 +168,7 @@ class ConformerEncoderLayer(nn.Module):
self.d_model = d_model self.d_model = d_model
self.self_attn = RelPositionMultiheadAttention( self.self_attn = RelPositionMultiheadAttention(
d_model, nhead, dropout=0.0 d_model, nhead, dropout=dropout,
) )
self.feed_forward = nn.Sequential( self.feed_forward = nn.Sequential(