mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
padding zeros (#591)
This commit is contained in:
parent
3b5846effa
commit
923b60a7c6
@ -248,7 +248,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
residual = src
|
residual = src
|
||||||
if self.normalize_before:
|
if self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
src = residual + self.dropout(self.conv_module(src))
|
src = residual + self.dropout(
|
||||||
|
self.conv_module(src, src_key_padding_mask=src_key_padding_mask)
|
||||||
|
)
|
||||||
if not self.normalize_before:
|
if not self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
|
|
||||||
@ -879,11 +881,16 @@ class ConvolutionModule(nn.Module):
|
|||||||
)
|
)
|
||||||
self.activation = Swish()
|
self.activation = Swish()
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(
|
||||||
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
|
) -> Tensor:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -897,6 +904,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
x = self.depthwise_conv(x)
|
x = self.depthwise_conv(x)
|
||||||
x = self.activation(self.norm(x))
|
x = self.activation(self.norm(x))
|
||||||
|
|
||||||
|
@ -248,7 +248,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
residual = src
|
residual = src
|
||||||
if self.normalize_before:
|
if self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
src = residual + self.dropout(self.conv_module(src))
|
src = residual + self.dropout(
|
||||||
|
self.conv_module(src, src_key_padding_mask=src_key_padding_mask)
|
||||||
|
)
|
||||||
if not self.normalize_before:
|
if not self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
|
|
||||||
@ -879,11 +881,16 @@ class ConvolutionModule(nn.Module):
|
|||||||
)
|
)
|
||||||
self.activation = Swish()
|
self.activation = Swish()
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(
|
||||||
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
|
) -> Tensor:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -897,6 +904,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
x = self.depthwise_conv(x)
|
x = self.depthwise_conv(x)
|
||||||
x = self.activation(self.norm(x))
|
x = self.activation(self.norm(x))
|
||||||
|
|
||||||
|
@ -246,7 +246,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
residual = src
|
residual = src
|
||||||
if self.normalize_before:
|
if self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
src = residual + self.dropout(self.conv_module(src))
|
src = residual + self.dropout(
|
||||||
|
self.conv_module(src, src_key_padding_mask=src_key_padding_mask)
|
||||||
|
)
|
||||||
if not self.normalize_before:
|
if not self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
|
|
||||||
@ -877,11 +879,16 @@ class ConvolutionModule(nn.Module):
|
|||||||
)
|
)
|
||||||
self.activation = Swish()
|
self.activation = Swish()
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(
|
||||||
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
|
) -> Tensor:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -895,6 +902,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
x = self.depthwise_conv(x)
|
x = self.depthwise_conv(x)
|
||||||
# x is (batch, channels, time)
|
# x is (batch, channels, time)
|
||||||
x = x.permute(0, 2, 1)
|
x = x.permute(0, 2, 1)
|
||||||
|
@ -253,7 +253,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
residual = src
|
residual = src
|
||||||
if self.normalize_before:
|
if self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
src = residual + self.dropout(self.conv_module(src))
|
src = residual + self.dropout(
|
||||||
|
self.conv_module(src, src_key_padding_mask=src_key_padding_mask)
|
||||||
|
)
|
||||||
if not self.normalize_before:
|
if not self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
|
|
||||||
@ -890,11 +892,16 @@ class ConvolutionModule(nn.Module):
|
|||||||
)
|
)
|
||||||
self.activation = Swish()
|
self.activation = Swish()
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(
|
||||||
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
|
) -> Tensor:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -908,6 +915,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
x = self.depthwise_conv(x)
|
x = self.depthwise_conv(x)
|
||||||
if self.use_batchnorm:
|
if self.use_batchnorm:
|
||||||
x = self.norm(x)
|
x = self.norm(x)
|
||||||
|
@ -253,7 +253,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
residual = src
|
residual = src
|
||||||
if self.normalize_before:
|
if self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
src = residual + self.dropout(self.conv_module(src))
|
src = residual + self.dropout(
|
||||||
|
self.conv_module(src, src_key_padding_mask=src_key_padding_mask)
|
||||||
|
)
|
||||||
if not self.normalize_before:
|
if not self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
|
|
||||||
@ -890,11 +892,16 @@ class ConvolutionModule(nn.Module):
|
|||||||
)
|
)
|
||||||
self.activation = Swish()
|
self.activation = Swish()
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(
|
||||||
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
|
) -> Tensor:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -908,6 +915,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
x = self.depthwise_conv(x)
|
x = self.depthwise_conv(x)
|
||||||
if self.use_batchnorm:
|
if self.use_batchnorm:
|
||||||
x = self.norm(x)
|
x = self.norm(x)
|
||||||
|
@ -268,7 +268,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
src = src + self.dropout(src_att)
|
src = src + self.dropout(src_att)
|
||||||
|
|
||||||
# convolution module
|
# convolution module
|
||||||
src = src + self.dropout(self.conv_module(src))
|
src = src + self.dropout(
|
||||||
|
self.conv_module(src, src_key_padding_mask=src_key_padding_mask)
|
||||||
|
)
|
||||||
|
|
||||||
# feed forward module
|
# feed forward module
|
||||||
src = src + self.dropout(self.feed_forward(src))
|
src = src + self.dropout(self.feed_forward(src))
|
||||||
@ -921,11 +923,16 @@ class ConvolutionModule(nn.Module):
|
|||||||
initial_scale=0.25,
|
initial_scale=0.25,
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(
|
||||||
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
|
) -> Tensor:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -941,6 +948,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
x = self.depthwise_conv(x)
|
x = self.depthwise_conv(x)
|
||||||
|
|
||||||
x = self.deriv_balancer2(x)
|
x = self.deriv_balancer2(x)
|
||||||
|
@ -247,7 +247,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
residual = src
|
residual = src
|
||||||
if self.normalize_before:
|
if self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
src = residual + self.dropout(self.conv_module(src))
|
src = residual + self.dropout(
|
||||||
|
self.conv_module(src, src_key_padding_mask=src_key_padding_mask)
|
||||||
|
)
|
||||||
if not self.normalize_before:
|
if not self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
|
|
||||||
@ -878,11 +880,16 @@ class ConvolutionModule(nn.Module):
|
|||||||
)
|
)
|
||||||
self.activation = Swish()
|
self.activation = Swish()
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(
|
||||||
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
|
) -> Tensor:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -896,6 +903,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
x = self.depthwise_conv(x)
|
x = self.depthwise_conv(x)
|
||||||
x = self.activation(self.norm(x))
|
x = self.activation(self.norm(x))
|
||||||
|
|
||||||
|
@ -527,7 +527,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
src = src + self.dropout(src_att)
|
src = src + self.dropout(src_att)
|
||||||
|
|
||||||
# convolution module
|
# convolution module
|
||||||
conv, _ = self.conv_module(src)
|
conv, _ = self.conv_module(
|
||||||
|
src, src_key_padding_mask=src_key_padding_mask
|
||||||
|
)
|
||||||
src = src + self.dropout(conv)
|
src = src + self.dropout(conv)
|
||||||
|
|
||||||
# feed forward module
|
# feed forward module
|
||||||
@ -1457,6 +1459,7 @@ class ConvolutionModule(nn.Module):
|
|||||||
x: Tensor,
|
x: Tensor,
|
||||||
cache: Optional[Tensor] = None,
|
cache: Optional[Tensor] = None,
|
||||||
right_context: int = 0,
|
right_context: int = 0,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
) -> Tuple[Tensor, Tensor]:
|
) -> Tuple[Tensor, Tensor]:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
@ -1467,6 +1470,7 @@ class ConvolutionModule(nn.Module):
|
|||||||
right_context:
|
right_context:
|
||||||
How many future frames the attention can see in current chunk.
|
How many future frames the attention can see in current chunk.
|
||||||
Note: It's not that each individual frame has `right_context` frames
|
Note: It's not that each individual frame has `right_context` frames
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
of right context, some have more.
|
of right context, some have more.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -1486,6 +1490,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
if self.causal and self.lorder > 0:
|
if self.causal and self.lorder > 0:
|
||||||
if cache is None:
|
if cache is None:
|
||||||
# Make depthwise_conv causal by
|
# Make depthwise_conv causal by
|
||||||
|
@ -527,7 +527,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
src = src + self.dropout(src_att)
|
src = src + self.dropout(src_att)
|
||||||
|
|
||||||
# convolution module
|
# convolution module
|
||||||
conv, _ = self.conv_module(src)
|
conv, _ = self.conv_module(
|
||||||
|
src, src_key_padding_mask=src_key_padding_mask
|
||||||
|
)
|
||||||
src = src + self.dropout(conv)
|
src = src + self.dropout(conv)
|
||||||
|
|
||||||
# feed forward module
|
# feed forward module
|
||||||
@ -1436,7 +1438,11 @@ class ConvolutionModule(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self, x: Tensor, cache: Optional[Tensor] = None, right_context: int = 0
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
cache: Optional[Tensor] = None,
|
||||||
|
right_context: int = 0,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
) -> Tuple[Tensor, Tensor]:
|
) -> Tuple[Tensor, Tensor]:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
@ -1448,6 +1454,7 @@ class ConvolutionModule(nn.Module):
|
|||||||
How many future frames the attention can see in current chunk.
|
How many future frames the attention can see in current chunk.
|
||||||
Note: It's not that each individual frame has `right_context` frames
|
Note: It's not that each individual frame has `right_context` frames
|
||||||
of right context, some have more.
|
of right context, some have more.
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -1466,6 +1473,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
if self.causal and self.lorder > 0:
|
if self.causal and self.lorder > 0:
|
||||||
if cache is None:
|
if cache is None:
|
||||||
# Make depthwise_conv causal by
|
# Make depthwise_conv causal by
|
||||||
|
@ -264,7 +264,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
src = src + self.dropout(src_att)
|
src = src + self.dropout(src_att)
|
||||||
|
|
||||||
# convolution module
|
# convolution module
|
||||||
src = src + self.dropout(self.conv_module(src))
|
src = src + self.dropout(
|
||||||
|
self.conv_module(src, src_key_padding_mask=src_key_padding_mask)
|
||||||
|
)
|
||||||
|
|
||||||
# feed forward module
|
# feed forward module
|
||||||
src = src + self.dropout(self.feed_forward(src))
|
src = src + self.dropout(self.feed_forward(src))
|
||||||
@ -927,11 +929,16 @@ class ConvolutionModule(nn.Module):
|
|||||||
initial_scale=0.25,
|
initial_scale=0.25,
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(
|
||||||
|
self,
|
||||||
|
x: Tensor,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
|
) -> Tensor:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -947,6 +954,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
x = self.depthwise_conv(x)
|
x = self.depthwise_conv(x)
|
||||||
|
|
||||||
x = self.deriv_balancer2(x)
|
x = self.deriv_balancer2(x)
|
||||||
|
@ -514,7 +514,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
if self.normalize_before:
|
if self.normalize_before:
|
||||||
src = self.norm_conv(src)
|
src = self.norm_conv(src)
|
||||||
|
|
||||||
src, _ = self.conv_module(src)
|
src, _ = self.conv_module(
|
||||||
|
src, src_key_padding_mask=src_key_padding_mask
|
||||||
|
)
|
||||||
src = residual + self.dropout(src)
|
src = residual + self.dropout(src)
|
||||||
|
|
||||||
if not self.normalize_before:
|
if not self.normalize_before:
|
||||||
@ -1383,11 +1385,18 @@ class ConvolutionModule(nn.Module):
|
|||||||
x: Tensor,
|
x: Tensor,
|
||||||
cache: Optional[Tensor] = None,
|
cache: Optional[Tensor] = None,
|
||||||
right_context: int = 0,
|
right_context: int = 0,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
) -> Tuple[Tensor, Tensor]:
|
) -> Tuple[Tensor, Tensor]:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
x: Input tensor (#time, batch, channels).
|
x: Input tensor (#time, batch, channels).
|
||||||
|
cache: The cache of depthwise_conv, only used in real streaming
|
||||||
|
decoding.
|
||||||
|
right_context:
|
||||||
|
How many future frames the attention can see in current chunk.
|
||||||
|
Note: It's not that each individual frame has `right_context` frames
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor: Output tensor (#time, batch, channels).
|
Tensor: Output tensor (#time, batch, channels).
|
||||||
@ -1401,6 +1410,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
if self.causal and self.lorder > 0:
|
if self.causal and self.lorder > 0:
|
||||||
if cache is None:
|
if cache is None:
|
||||||
# Make depthwise_conv causal by
|
# Make depthwise_conv causal by
|
||||||
|
@ -520,7 +520,9 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
src = src + self.dropout(src_att)
|
src = src + self.dropout(src_att)
|
||||||
|
|
||||||
# convolution module
|
# convolution module
|
||||||
conv, _ = self.conv_module(src)
|
conv, _ = self.conv_module(
|
||||||
|
src, src_key_padding_mask=src_key_padding_mask
|
||||||
|
)
|
||||||
src = src + self.dropout(conv)
|
src = src + self.dropout(conv)
|
||||||
|
|
||||||
# feed forward module
|
# feed forward module
|
||||||
@ -1392,6 +1394,7 @@ class ConvolutionModule(nn.Module):
|
|||||||
x: Tensor,
|
x: Tensor,
|
||||||
cache: Optional[Tensor] = None,
|
cache: Optional[Tensor] = None,
|
||||||
right_context: int = 0,
|
right_context: int = 0,
|
||||||
|
src_key_padding_mask: Optional[Tensor] = None,
|
||||||
) -> Tuple[Tensor, Tensor]:
|
) -> Tuple[Tensor, Tensor]:
|
||||||
"""Compute convolution module.
|
"""Compute convolution module.
|
||||||
Args:
|
Args:
|
||||||
@ -1402,6 +1405,7 @@ class ConvolutionModule(nn.Module):
|
|||||||
How many future frames the attention can see in current chunk.
|
How many future frames the attention can see in current chunk.
|
||||||
Note: It's not that each individual frame has `right_context` frames
|
Note: It's not that each individual frame has `right_context` frames
|
||||||
of right context, some have more.
|
of right context, some have more.
|
||||||
|
src_key_padding_mask: the mask for the src keys per batch (optional).
|
||||||
Returns:
|
Returns:
|
||||||
If cache is None return the output tensor (#time, batch, channels).
|
If cache is None return the output tensor (#time, batch, channels).
|
||||||
If cache is not None, return a tuple of Tensor, the first one is
|
If cache is not None, return a tuple of Tensor, the first one is
|
||||||
@ -1418,6 +1422,8 @@ class ConvolutionModule(nn.Module):
|
|||||||
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
x = nn.functional.glu(x, dim=1) # (batch, channels, time)
|
||||||
|
|
||||||
# 1D Depthwise Conv
|
# 1D Depthwise Conv
|
||||||
|
if src_key_padding_mask is not None:
|
||||||
|
x.masked_fill_(src_key_padding_mask.unsqueeze(1).expand_as(x), 0.0)
|
||||||
if self.causal and self.lorder > 0:
|
if self.causal and self.lorder > 0:
|
||||||
if cache is None:
|
if cache is None:
|
||||||
# Make depthwise_conv causal by
|
# Make depthwise_conv causal by
|
||||||
|
Loading…
x
Reference in New Issue
Block a user