minor fix for zipformer recipe (#758)

* minor fix

* add CI test
This commit is contained in:
Zengwei Yao 2022-12-13 15:47:30 +08:00 committed by GitHub
parent b25c234c51
commit 0470bbae66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 24 additions and 16 deletions

View File

@ -113,6 +113,9 @@ jobs:
cd ../pruned_transducer_stateless4 cd ../pruned_transducer_stateless4
pytest -v -s pytest -v -s
cd ../pruned_transducer_stateless7
pytest -v -s
cd ../transducer_stateless cd ../transducer_stateless
pytest -v -s pytest -v -s

View File

@ -294,7 +294,6 @@ def main():
if params.jit is True: if params.jit is True:
convert_scaled_to_non_scaled(model, inplace=True) convert_scaled_to_non_scaled(model, inplace=True)
logging.info("Using torch.jit.script()")
# We won't use the forward() method of the model in C++, so just ignore # We won't use the forward() method of the model in C++, so just ignore
# it here. # it here.
# Otherwise, one of its arguments is a ragged tensor and is not # Otherwise, one of its arguments is a ragged tensor and is not

View File

@ -20,19 +20,21 @@
To run this file, do: To run this file, do:
cd icefall/egs/librispeech/ASR cd icefall/egs/librispeech/ASR
python ./pruned_transducer_stateless4/test_model.py python ./pruned_transducer_stateless7/test_model.py
""" """
import torch
from scaling_converter import convert_scaled_to_non_scaled
from train import get_params, get_transducer_model from train import get_params, get_transducer_model
def test_model_1(): def test_model():
params = get_params() params = get_params()
params.vocab_size = 500 params.vocab_size = 500
params.blank_id = 0 params.blank_id = 0
params.context_size = 2 params.context_size = 2
params.num_encoder_layers = "2,4,3,2,4" params.num_encoder_layers = "2,4,3,2,4"
# params.feedforward_dims = "1024,1024,1536,1536,1024"
params.feedforward_dims = "1024,1024,2048,2048,1024" params.feedforward_dims = "1024,1024,2048,2048,1024"
params.nhead = "8,8,8,8,8" params.nhead = "8,8,8,8,8"
params.encoder_dims = "384,384,384,384,384" params.encoder_dims = "384,384,384,384,384"
@ -47,9 +49,19 @@ def test_model_1():
num_param = sum([p.numel() for p in model.parameters()]) num_param = sum([p.numel() for p in model.parameters()])
print(f"Number of model parameters: {num_param}") print(f"Number of model parameters: {num_param}")
# Test jit script
convert_scaled_to_non_scaled(model, inplace=True)
# We won't use the forward() method of the model in C++, so just ignore
# it here.
# Otherwise, one of its arguments is a ragged tensor and is not
# torch scriptabe.
model.__class__.forward = torch.jit.ignore(model.__class__.forward)
print("Using torch.jit.script")
model = torch.jit.script(model)
def main(): def main():
test_model_1() test_model()
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Copyright (c) 2021 University of Chinese Academy of Sciences (author: Han Zhu) # Copyright 2022 Xiaomi Corp. (authors: Daniel Povey)
# #
# See ../../../../LICENSE for clarification regarding multiple authors # See ../../../../LICENSE for clarification regarding multiple authors
# #
@ -454,7 +454,7 @@ class ZipformerEncoderLayer(nn.Module):
# pooling module # pooling module
if torch.jit.is_scripting(): if torch.jit.is_scripting():
src = src + self.pooling(src, key_padding_mask=src_key_padding_mask) src = src + self.pooling(src, key_padding_mask=src_key_padding_mask)
elif random.random() > dynamic_dropout: elif random.random() >= dynamic_dropout:
src = src + self.pooling(src, key_padding_mask=src_key_padding_mask) src = src + self.pooling(src, key_padding_mask=src_key_padding_mask)
if torch.jit.is_scripting(): if torch.jit.is_scripting():
@ -478,7 +478,7 @@ class ZipformerEncoderLayer(nn.Module):
src, src_key_padding_mask=src_key_padding_mask src, src_key_padding_mask=src_key_padding_mask
) )
else: else:
use_self_attn = random.random() > dynamic_dropout use_self_attn = random.random() >= dynamic_dropout
if use_self_attn: if use_self_attn:
src_att, attn_weights = self.self_attn( src_att, attn_weights = self.self_attn(
src, src,
@ -488,7 +488,7 @@ class ZipformerEncoderLayer(nn.Module):
) )
src = src + src_att src = src + src_att
if random.random() > dynamic_dropout: if random.random() >= dynamic_dropout:
src = src + self.conv_module1( src = src + self.conv_module1(
src, src_key_padding_mask=src_key_padding_mask src, src_key_padding_mask=src_key_padding_mask
) )
@ -497,7 +497,7 @@ class ZipformerEncoderLayer(nn.Module):
if use_self_attn: if use_self_attn:
src = src + self.self_attn.forward2(src, attn_weights) src = src + self.self_attn.forward2(src, attn_weights)
if random.random() > dynamic_dropout: if random.random() >= dynamic_dropout:
src = src + self.conv_module2( src = src + self.conv_module2(
src, src_key_padding_mask=src_key_padding_mask src, src_key_padding_mask=src_key_padding_mask
) )
@ -1289,12 +1289,6 @@ class RelPositionMultiheadAttention(nn.Module):
bsz * num_heads, seq_len, seq_len bsz * num_heads, seq_len, seq_len
) )
assert list(attn_output_weights.size()) == [
bsz * num_heads,
seq_len,
seq_len,
]
if attn_mask is not None: if attn_mask is not None:
if attn_mask.dtype == torch.bool: if attn_mask.dtype == torch.bool:
attn_output_weights.masked_fill_(attn_mask, float("-inf")) attn_output_weights.masked_fill_(attn_mask, float("-inf"))