Remove xscale from pos_embedding

2025-12-10 06:25:27 +00:00 · 2022-03-16 15:50:11 +08:00 · 2022-03-16 15:50:11 +08:00 · c82db4184a
commit c82db4184a
parent 6561743d7b
2 changed files with 3 additions and 5 deletions
--- a/egs/librispeech/ASR/conformer_ctc/subsampling.py
+++ b/egs/librispeech/ASR/conformer_ctc/subsampling.py
@ -449,7 +449,7 @@ class ScaledLinear(nn.Linear):
        fan_in = self.weight.shape[1] * self.weight[0][0].numel()
        scale = fan_in ** -0.5  # 1/sqrt(fan_in)
        with torch.no_grad():
-            self.weight_scale += (torch.tensor(scale / 0.05).log() / self.scale_speed)
+            self.weight_scale += (torch.tensor(scale / std).log() / self.scale_speed)
    def get_weight(self):
        return self.weight * (self.weight_scale * self.scale_speed).exp()
@ -485,7 +485,7 @@ class ScaledConv1d(nn.Conv1d):
        fan_in = self.weight.shape[1] * self.weight[0][0].numel()
        scale = fan_in ** -0.5  # 1/sqrt(fan_in)
        with torch.no_grad():
-            self.weight_scale += (torch.tensor(scale / 0.05).log() / self.scale_speed)
+            self.weight_scale += (torch.tensor(scale / std).log() / self.scale_speed)
    def get_weight(self):
@ -527,7 +527,7 @@ class ScaledConv2d(nn.Conv2d):
        fan_in = self.weight.shape[1] * self.weight[0][0].numel()
        scale = fan_in ** -0.5  # 1/sqrt(fan_in)
        with torch.no_grad():
-            self.weight_scale += (torch.tensor(scale / 0.05).log() / self.scale_speed)
+            self.weight_scale += (torch.tensor(scale / std).log() / self.scale_speed)
    def get_weight(self):
--- a/egs/librispeech/ASR/transducer_stateless/conformer.py
+++ b/egs/librispeech/ASR/transducer_stateless/conformer.py
@ -327,7 +327,6 @@ class RelPositionalEncoding(torch.nn.Module):
        """Construct an PositionalEncoding object."""
        super(RelPositionalEncoding, self).__init__()
        self.d_model = d_model
        self.xscale = math.sqrt(self.d_model)
        self.dropout = torch.nn.Dropout(p=dropout_rate)
        self.pe = None
        self.extend_pe(torch.tensor(0.0).expand(1, max_len))
@ -379,7 +378,6 @@ class RelPositionalEncoding(torch.nn.Module):
        """
        self.extend_pe(x)
        x = x * self.xscale
        pos_emb = self.pe[
            :,
            self.pe.size(1) // 2