Remove initial_speed

This commit is contained in:
Daniel Povey 2022-04-06 13:17:26 +08:00
parent a41e93437c
commit 61486a0f76
3 changed files with 0 additions and 17 deletions

View File

@ -956,30 +956,22 @@ class Conv2dSubsampling(nn.Module):
assert in_channels >= 7
super().__init__()
# This initial_speed is to slightly slow down the relative speed of
# training during the warmup phase by increasing the magnitude of the
# initial parameter values. The intention is to allow us to
# use a higher lr_factor.
initial_speed = 0.5
self.conv = nn.Sequential(
ScaledConv2d(
in_channels=1, out_channels=layer1_channels,
kernel_size=3, padding=1,
initial_speed=initial_speed,
),
ActivationBalancer(channel_dim=1),
DoubleSwish(),
ScaledConv2d(
in_channels=layer1_channels, out_channels=layer2_channels,
kernel_size=3, stride=2,
initial_speed=initial_speed,
),
ActivationBalancer(channel_dim=1),
DoubleSwish(),
ScaledConv2d(
in_channels=layer2_channels, out_channels=layer3_channels,
kernel_size=3, stride=2,
initial_speed=initial_speed,
),
ActivationBalancer(channel_dim=1),
DoubleSwish(),

View File

@ -56,16 +56,10 @@ class Decoder(nn.Module):
"""
super().__init__()
# This initial_speed is to slightly slow down the relative speed of
# training during the warmup phase by increasing the magnitude of the
# initial parameter values. The intention is to allow us to
# use a higher lr_factor.
initial_speed = 0.5
self.embedding = ScaledEmbedding(
num_embeddings=vocab_size,
embedding_dim=decoder_dim,
padding_idx=blank_id,
initial_speed=initial_speed
)
self.blank_id = blank_id

View File

@ -27,9 +27,6 @@ class Joiner(nn.Module):
vocab_size: int):
super().__init__()
# We don't bother giving the 'initial_speed' arg to the decoder
# submodules, because it does not affect the initial convergence of the
# system (only the simple joiner is involved in that).
self.encoder_proj = ScaledLinear(encoder_dim, joiner_dim)
self.decoder_proj = ScaledLinear(decoder_dim, joiner_dim)
self.output_linear = ScaledLinear(joiner_dim, vocab_size)