mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
Remove initial_speed
This commit is contained in:
parent
a41e93437c
commit
61486a0f76
@ -956,30 +956,22 @@ class Conv2dSubsampling(nn.Module):
|
||||
assert in_channels >= 7
|
||||
super().__init__()
|
||||
|
||||
# This initial_speed is to slightly slow down the relative speed of
|
||||
# training during the warmup phase by increasing the magnitude of the
|
||||
# initial parameter values. The intention is to allow us to
|
||||
# use a higher lr_factor.
|
||||
initial_speed = 0.5
|
||||
self.conv = nn.Sequential(
|
||||
ScaledConv2d(
|
||||
in_channels=1, out_channels=layer1_channels,
|
||||
kernel_size=3, padding=1,
|
||||
initial_speed=initial_speed,
|
||||
),
|
||||
ActivationBalancer(channel_dim=1),
|
||||
DoubleSwish(),
|
||||
ScaledConv2d(
|
||||
in_channels=layer1_channels, out_channels=layer2_channels,
|
||||
kernel_size=3, stride=2,
|
||||
initial_speed=initial_speed,
|
||||
),
|
||||
ActivationBalancer(channel_dim=1),
|
||||
DoubleSwish(),
|
||||
ScaledConv2d(
|
||||
in_channels=layer2_channels, out_channels=layer3_channels,
|
||||
kernel_size=3, stride=2,
|
||||
initial_speed=initial_speed,
|
||||
),
|
||||
ActivationBalancer(channel_dim=1),
|
||||
DoubleSwish(),
|
||||
|
@ -56,16 +56,10 @@ class Decoder(nn.Module):
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
# This initial_speed is to slightly slow down the relative speed of
|
||||
# training during the warmup phase by increasing the magnitude of the
|
||||
# initial parameter values. The intention is to allow us to
|
||||
# use a higher lr_factor.
|
||||
initial_speed = 0.5
|
||||
self.embedding = ScaledEmbedding(
|
||||
num_embeddings=vocab_size,
|
||||
embedding_dim=decoder_dim,
|
||||
padding_idx=blank_id,
|
||||
initial_speed=initial_speed
|
||||
)
|
||||
self.blank_id = blank_id
|
||||
|
||||
|
@ -27,9 +27,6 @@ class Joiner(nn.Module):
|
||||
vocab_size: int):
|
||||
super().__init__()
|
||||
|
||||
# We don't bother giving the 'initial_speed' arg to the decoder
|
||||
# submodules, because it does not affect the initial convergence of the
|
||||
# system (only the simple joiner is involved in that).
|
||||
self.encoder_proj = ScaledLinear(encoder_dim, joiner_dim)
|
||||
self.decoder_proj = ScaledLinear(decoder_dim, joiner_dim)
|
||||
self.output_linear = ScaledLinear(joiner_dim, vocab_size)
|
||||
|
Loading…
x
Reference in New Issue
Block a user