diff --git a/egs/librispeech/ASR/transducer/test_rnn.py b/egs/librispeech/ASR/transducer/test_rnn.py index 74c94cc70..d8effb996 100755 --- a/egs/librispeech/ASR/transducer/test_rnn.py +++ b/egs/librispeech/ASR/transducer/test_rnn.py @@ -432,11 +432,11 @@ def test_layernorm_lstm_forward(device="cpu"): def test_layernorm_lstm_with_projection_forward(device="cpu"): - input_size = torch.randint(low=2, high=100, size=(1,)).item() - hidden_size = torch.randint(low=10, high=100, size=(1,)).item() - proj_size = torch.randint(low=2, high=hidden_size, size=(1,)).item() - num_layers = torch.randint(low=2, high=100, size=(1,)).item() - bias = torch.randint(low=0, high=1000, size=(1,)).item() & 2 == 0 + input_size = 40 # torch.randint(low=2, high=100, size=(1,)).item() + hidden_size = 40 # torch.randint(low=10, high=100, size=(1,)).item() + proj_size = 20 # torch.randint(low=2, high=hidden_size, size=(1,)).item() + num_layers = 12 # torch.randint(low=2, high=100, size=(1,)).item() + bias = True # torch.randint(low=0, high=1000, size=(1,)).item() & 2 == 0 self_lstm = LayerNormLSTM( input_size=input_size, diff --git a/icefall/utils.py b/icefall/utils.py index 2358ed02f..5d86472b5 100644 --- a/icefall/utils.py +++ b/icefall/utils.py @@ -1095,10 +1095,10 @@ def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor: assert lengths.ndim == 1, lengths.ndim max_len = max(max_len, lengths.max()) n = lengths.size(0) + seq_range = torch.arange(0, max_len, device=lengths.device) + expaned_lengths = seq_range.unsqueeze(0).expand(n, max_len) - expaned_lengths = torch.arange(max_len).expand(n, max_len).to(lengths) - - return expaned_lengths >= lengths.unsqueeze(1) + return expaned_lengths >= lengths.unsqueeze(-1) # Copied and modified from https://github.com/wenet-e2e/wenet/blob/main/wenet/utils/mask.py