mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 17:42:21 +00:00
Refactor conformer. (#237)
This commit is contained in:
parent
ad62981765
commit
1603744469
@ -15,7 +15,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import copy
|
||||||
import math
|
import math
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
@ -264,13 +264,12 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
return src
|
return src
|
||||||
|
|
||||||
|
|
||||||
class ConformerEncoder(nn.TransformerEncoder):
|
class ConformerEncoder(nn.Module):
|
||||||
r"""ConformerEncoder is a stack of N encoder layers
|
r"""ConformerEncoder is a stack of N encoder layers
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
encoder_layer: an instance of the ConformerEncoderLayer() class (required).
|
encoder_layer: an instance of the ConformerEncoderLayer() class (required).
|
||||||
num_layers: the number of sub-encoder-layers in the encoder (required).
|
num_layers: the number of sub-encoder-layers in the encoder (required).
|
||||||
norm: the layer normalization component (optional).
|
|
||||||
|
|
||||||
Examples::
|
Examples::
|
||||||
>>> encoder_layer = ConformerEncoderLayer(d_model=512, nhead=8)
|
>>> encoder_layer = ConformerEncoderLayer(d_model=512, nhead=8)
|
||||||
@ -280,12 +279,12 @@ class ConformerEncoder(nn.TransformerEncoder):
|
|||||||
>>> out = conformer_encoder(src, pos_emb)
|
>>> out = conformer_encoder(src, pos_emb)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, encoder_layer: nn.Module, num_layers: int) -> None:
|
||||||
self, encoder_layer: nn.Module, num_layers: int, norm: nn.Module = None
|
super().__init__()
|
||||||
) -> None:
|
self.layers = nn.ModuleList(
|
||||||
super(ConformerEncoder, self).__init__(
|
[copy.deepcopy(encoder_layer) for i in range(num_layers)]
|
||||||
encoder_layer=encoder_layer, num_layers=num_layers, norm=norm
|
|
||||||
)
|
)
|
||||||
|
self.num_layers = num_layers
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@ -320,9 +319,6 @@ class ConformerEncoder(nn.TransformerEncoder):
|
|||||||
src_key_padding_mask=src_key_padding_mask,
|
src_key_padding_mask=src_key_padding_mask,
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.norm is not None:
|
|
||||||
output = self.norm(output)
|
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
@ -643,6 +639,7 @@ class RelPositionMultiheadAttention(nn.Module):
|
|||||||
if _b is not None:
|
if _b is not None:
|
||||||
_b = _b[_start:_end]
|
_b = _b[_start:_end]
|
||||||
q = nn.functional.linear(query, _w, _b)
|
q = nn.functional.linear(query, _w, _b)
|
||||||
|
|
||||||
# This is inline in_proj function with in_proj_weight and in_proj_bias
|
# This is inline in_proj function with in_proj_weight and in_proj_bias
|
||||||
_b = in_proj_bias
|
_b = in_proj_bias
|
||||||
_start = embed_dim
|
_start = embed_dim
|
||||||
|
51
egs/librispeech/ASR/transducer_stateless/test_conformer.py
Executable file
51
egs/librispeech/ASR/transducer_stateless/test_conformer.py
Executable file
@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright 2022 Xiaomi Corp. (authors: Daniel Povey
|
||||||
|
# Fangjun Kuang)
|
||||||
|
#
|
||||||
|
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
To run this file, do:
|
||||||
|
|
||||||
|
cd icefall/egs/librispeech/ASR
|
||||||
|
python ./transducer_stateless/test_conformer.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from conformer import Conformer
|
||||||
|
|
||||||
|
|
||||||
|
def test_conformer():
|
||||||
|
feature_dim = 50
|
||||||
|
c = Conformer(
|
||||||
|
num_features=feature_dim, output_dim=256, d_model=128, nhead=4
|
||||||
|
)
|
||||||
|
batch_size = 5
|
||||||
|
seq_len = 20
|
||||||
|
# Just make sure the forward pass runs.
|
||||||
|
logits, lengths = c(
|
||||||
|
torch.randn(batch_size, seq_len, feature_dim),
|
||||||
|
torch.full((batch_size,), seq_len, dtype=torch.int64),
|
||||||
|
)
|
||||||
|
print(logits.shape)
|
||||||
|
print(lengths.shape)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
test_conformer()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
x
Reference in New Issue
Block a user