diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/.conformer.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless5/.conformer.py.swp index 871faa871..aca0a97e7 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless5/.conformer.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless5/.conformer.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/convolution.py b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/convolution.py new file mode 100644 index 000000000..6a5d2c30c --- /dev/null +++ b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/convolution.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2020 Johns Hopkins University (Shinji Watanabe) +# Northwestern Polytechnical University (Pengcheng Guo) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +"""ConvolutionModule definition.""" + +from torch import nn + + +class ConvolutionModule(nn.Module): + """ConvolutionModule in Conformer model. + + Args: + channels (int): The number of channels of conv layers. + kernel_size (int): Kernerl size of conv layers. + + """ + + def __init__(self, channels, kernel_size, activation=nn.ReLU(), bias=True): + """Construct an ConvolutionModule object.""" + super(ConvolutionModule, self).__init__() + # kernerl_size should be a odd number for 'SAME' padding + assert (kernel_size - 1) % 2 == 0 + + self.pointwise_conv1 = nn.Conv1d( + channels, + 2 * channels, + kernel_size=1, + stride=1, + padding=0, + bias=bias, + ) + self.depthwise_conv = nn.Conv1d( + channels, + channels, + kernel_size, + stride=1, + padding=(kernel_size - 1) // 2, + groups=channels, + bias=bias, + ) + self.norm = nn.BatchNorm1d(channels) + self.pointwise_conv2 = nn.Conv1d( + channels, + channels, + kernel_size=1, + stride=1, + padding=0, + bias=bias, + ) + self.activation = activation + + def forward(self, x): + """Compute convolution module. + + Args: + x (torch.Tensor): Input tensor (#batch, time, channels). + + Returns: + torch.Tensor: Output tensor (#batch, time, channels). + + """ + # exchange the temporal dimension and the feature dimension + x = x.transpose(1, 2) + + # GLU mechanism + x = self.pointwise_conv1(x) # (batch, 2*channel, dim) + x = nn.functional.glu(x, dim=1) # (batch, channel, dim) + + # 1D Depthwise Conv + x = self.depthwise_conv(x) + x = self.activation(self.norm(x)) + + x = self.pointwise_conv2(x) + + return x.transpose(1, 2)