mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-10 10:32:17 +00:00
Make layer dropout rate 0.075, was 0.1.
This commit is contained in:
parent
45f872c27d
commit
e0ba4ef3ec
@ -40,6 +40,7 @@ class Conformer(EncoderInterface):
|
|||||||
dim_feedforward (int): feedforward dimention
|
dim_feedforward (int): feedforward dimention
|
||||||
num_encoder_layers (int): number of encoder layers
|
num_encoder_layers (int): number of encoder layers
|
||||||
dropout (float): dropout rate
|
dropout (float): dropout rate
|
||||||
|
layer_dropout (float): layer-dropout rate.
|
||||||
cnn_module_kernel (int): Kernel size of convolution module
|
cnn_module_kernel (int): Kernel size of convolution module
|
||||||
vgg_frontend (bool): whether to use vgg frontend.
|
vgg_frontend (bool): whether to use vgg frontend.
|
||||||
"""
|
"""
|
||||||
@ -53,6 +54,7 @@ class Conformer(EncoderInterface):
|
|||||||
dim_feedforward: int = 2048,
|
dim_feedforward: int = 2048,
|
||||||
num_encoder_layers: int = 12,
|
num_encoder_layers: int = 12,
|
||||||
dropout: float = 0.1,
|
dropout: float = 0.1,
|
||||||
|
layer_dropout: float = 0.075,
|
||||||
cnn_module_kernel: int = 31,
|
cnn_module_kernel: int = 31,
|
||||||
) -> None:
|
) -> None:
|
||||||
super(Conformer, self).__init__()
|
super(Conformer, self).__init__()
|
||||||
@ -76,6 +78,7 @@ class Conformer(EncoderInterface):
|
|||||||
nhead,
|
nhead,
|
||||||
dim_feedforward,
|
dim_feedforward,
|
||||||
dropout,
|
dropout,
|
||||||
|
layer_dropout,
|
||||||
cnn_module_kernel,
|
cnn_module_kernel,
|
||||||
)
|
)
|
||||||
self.encoder = ConformerEncoder(encoder_layer, num_encoder_layers)
|
self.encoder = ConformerEncoder(encoder_layer, num_encoder_layers)
|
||||||
@ -149,9 +152,13 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
nhead: int,
|
nhead: int,
|
||||||
dim_feedforward: int = 2048,
|
dim_feedforward: int = 2048,
|
||||||
dropout: float = 0.1,
|
dropout: float = 0.1,
|
||||||
|
layer_dropout: float = 0.075,
|
||||||
cnn_module_kernel: int = 31,
|
cnn_module_kernel: int = 31,
|
||||||
) -> None:
|
) -> None:
|
||||||
super(ConformerEncoderLayer, self).__init__()
|
super(ConformerEncoderLayer, self).__init__()
|
||||||
|
|
||||||
|
self.layer_dropout = layer_dropout
|
||||||
|
|
||||||
self.d_model = d_model
|
self.d_model = d_model
|
||||||
|
|
||||||
self.self_attn = RelPositionMultiheadAttention(
|
self.self_attn = RelPositionMultiheadAttention(
|
||||||
@ -217,10 +224,10 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
src_orig = src
|
src_orig = src
|
||||||
|
|
||||||
warmup_scale = min(0.1 + warmup, 1.0)
|
warmup_scale = min(0.1 + warmup, 1.0)
|
||||||
# alpha = 1.0 means fully use this encoder layer, 0.0 would mean completely
|
# alpha = 1.0 means fully use this encoder layer, 0.0 would mean
|
||||||
# bypass it.
|
# completely bypass it.
|
||||||
if self.training:
|
if self.training:
|
||||||
alpha = warmup_scale if torch.rand(()).item() <= 0.9 else 0.1
|
alpha = warmup_scale if torch.rand(()).item() <= (1.0 - self.layer_dropout) else 0.1
|
||||||
else:
|
else:
|
||||||
alpha = 1.0
|
alpha = 1.0
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user