mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-09 00:54:18 +00:00
Various fixes to support torch script.
This commit is contained in:
parent
6f7860a0a6
commit
96a544fb69
@ -116,8 +116,6 @@ def main():
|
|||||||
args = get_parser().parse_args()
|
args = get_parser().parse_args()
|
||||||
args.exp_dir = Path(args.exp_dir)
|
args.exp_dir = Path(args.exp_dir)
|
||||||
|
|
||||||
assert args.jit is False, "Support torchscript will be added later"
|
|
||||||
|
|
||||||
params = get_params()
|
params = get_params()
|
||||||
params.update(vars(args))
|
params.update(vars(args))
|
||||||
|
|
||||||
@ -159,6 +157,11 @@ def main():
|
|||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
if params.jit:
|
if params.jit:
|
||||||
|
# We won't use the forward() method of the model in C++, so just ignore
|
||||||
|
# it here.
|
||||||
|
# Otherwise, one of its arguments is a ragged tensor and is not
|
||||||
|
# torch scriptabe.
|
||||||
|
model.__class__.forward = torch.jit.ignore(model.__class__.forward)
|
||||||
logging.info("Using torch.jit.script")
|
logging.info("Using torch.jit.script")
|
||||||
model = torch.jit.script(model)
|
model = torch.jit.script(model)
|
||||||
filename = params.exp_dir / "cpu_jit.pt"
|
filename = params.exp_dir / "cpu_jit.pt"
|
||||||
|
@ -112,10 +112,13 @@ class Conformer(EncoderInterface):
|
|||||||
x, pos_emb = self.encoder_pos(x)
|
x, pos_emb = self.encoder_pos(x)
|
||||||
x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C)
|
x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C)
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
# Caution: We assume the subsampling factor is 4!
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
# Caution: We assume the subsampling factor is 4!
|
# lengths = ((x_lens - 1) // 2 - 1) // 2 # issue an warning
|
||||||
lengths = ((x_lens - 1) // 2 - 1) // 2
|
#
|
||||||
|
# Note: rounding_mode in torch.div() is available only in torch >= 1.8.0
|
||||||
|
lengths = (((x_lens - 1) >> 1) - 1) >> 1
|
||||||
|
|
||||||
assert x.size(0) == lengths.max().item()
|
assert x.size(0) == lengths.max().item()
|
||||||
mask = make_pad_mask(lengths)
|
mask = make_pad_mask(lengths)
|
||||||
|
|
||||||
|
@ -131,8 +131,6 @@ def main():
|
|||||||
args = get_parser().parse_args()
|
args = get_parser().parse_args()
|
||||||
args.exp_dir = Path(args.exp_dir)
|
args.exp_dir = Path(args.exp_dir)
|
||||||
|
|
||||||
assert args.jit is False, "Support torchscript will be added later"
|
|
||||||
|
|
||||||
params = get_params()
|
params = get_params()
|
||||||
params.update(vars(args))
|
params.update(vars(args))
|
||||||
|
|
||||||
@ -191,6 +189,11 @@ def main():
|
|||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
if params.jit:
|
if params.jit:
|
||||||
|
# We won't use the forward() method of the model in C++, so just ignore
|
||||||
|
# it here.
|
||||||
|
# Otherwise, one of its arguments is a ragged tensor and is not
|
||||||
|
# torch scriptabe.
|
||||||
|
model.__class__.forward = torch.jit.ignore(model.__class__.forward)
|
||||||
logging.info("Using torch.jit.script")
|
logging.info("Using torch.jit.script")
|
||||||
model = torch.jit.script(model)
|
model = torch.jit.script(model)
|
||||||
filename = params.exp_dir / "cpu_jit.pt"
|
filename = params.exp_dir / "cpu_jit.pt"
|
||||||
|
@ -212,7 +212,10 @@ class ScaledLinear(nn.Linear):
|
|||||||
return self.weight * self.weight_scale.exp()
|
return self.weight * self.weight_scale.exp()
|
||||||
|
|
||||||
def get_bias(self):
|
def get_bias(self):
|
||||||
return None if self.bias is None else self.bias * self.bias_scale.exp()
|
if self.bias is None or self.bias_scale is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return self.bias * self.bias_scale.exp()
|
||||||
|
|
||||||
def forward(self, input: Tensor) -> Tensor:
|
def forward(self, input: Tensor) -> Tensor:
|
||||||
return torch.nn.functional.linear(
|
return torch.nn.functional.linear(
|
||||||
@ -255,7 +258,11 @@ class ScaledConv1d(nn.Conv1d):
|
|||||||
return self.weight * self.weight_scale.exp()
|
return self.weight * self.weight_scale.exp()
|
||||||
|
|
||||||
def get_bias(self):
|
def get_bias(self):
|
||||||
return None if self.bias is None else self.bias * self.bias_scale.exp()
|
bias = self.bias
|
||||||
|
bias_scale = self.bias_scale
|
||||||
|
if bias is None or bias_scale is None:
|
||||||
|
return None
|
||||||
|
return bias * bias_scale.exp()
|
||||||
|
|
||||||
def forward(self, input: Tensor) -> Tensor:
|
def forward(self, input: Tensor) -> Tensor:
|
||||||
F = torch.nn.functional
|
F = torch.nn.functional
|
||||||
@ -269,7 +276,7 @@ class ScaledConv1d(nn.Conv1d):
|
|||||||
self.get_weight(),
|
self.get_weight(),
|
||||||
self.get_bias(),
|
self.get_bias(),
|
||||||
self.stride,
|
self.stride,
|
||||||
_single(0),
|
(0,),
|
||||||
self.dilation,
|
self.dilation,
|
||||||
self.groups,
|
self.groups,
|
||||||
)
|
)
|
||||||
@ -319,7 +326,12 @@ class ScaledConv2d(nn.Conv2d):
|
|||||||
return self.weight * self.weight_scale.exp()
|
return self.weight * self.weight_scale.exp()
|
||||||
|
|
||||||
def get_bias(self):
|
def get_bias(self):
|
||||||
return None if self.bias is None else self.bias * self.bias_scale.exp()
|
# see https://github.com/pytorch/pytorch/issues/24135
|
||||||
|
bias = self.bias
|
||||||
|
bias_scale = self.bias_scale
|
||||||
|
if bias is None or bias_scale is None:
|
||||||
|
return None
|
||||||
|
return bias * bias_scale.exp()
|
||||||
|
|
||||||
def _conv_forward(self, input, weight):
|
def _conv_forward(self, input, weight):
|
||||||
F = torch.nn.functional
|
F = torch.nn.functional
|
||||||
@ -333,7 +345,7 @@ class ScaledConv2d(nn.Conv2d):
|
|||||||
weight,
|
weight,
|
||||||
self.get_bias(),
|
self.get_bias(),
|
||||||
self.stride,
|
self.stride,
|
||||||
_pair(0),
|
(0, 0),
|
||||||
self.dilation,
|
self.dilation,
|
||||||
self.groups,
|
self.groups,
|
||||||
)
|
)
|
||||||
@ -398,6 +410,9 @@ class ActivationBalancer(torch.nn.Module):
|
|||||||
self.max_abs = max_abs
|
self.max_abs = max_abs
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(self, x: Tensor) -> Tensor:
|
||||||
|
if torch.jit.is_scripting():
|
||||||
|
return x
|
||||||
|
|
||||||
return ActivationBalancerFunction.apply(
|
return ActivationBalancerFunction.apply(
|
||||||
x,
|
x,
|
||||||
self.channel_dim,
|
self.channel_dim,
|
||||||
@ -444,6 +459,8 @@ class DoubleSwish(torch.nn.Module):
|
|||||||
"""Return double-swish activation function which is an approximation to Swish(Swish(x)),
|
"""Return double-swish activation function which is an approximation to Swish(Swish(x)),
|
||||||
that we approximate closely with x * sigmoid(x-1).
|
that we approximate closely with x * sigmoid(x-1).
|
||||||
"""
|
"""
|
||||||
|
if torch.jit.is_scripting():
|
||||||
|
return x * torch.sigmoid(x - 1.0)
|
||||||
return DoubleSwishFunction.apply(x)
|
return DoubleSwishFunction.apply(x)
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,8 +132,6 @@ def main():
|
|||||||
args = get_parser().parse_args()
|
args = get_parser().parse_args()
|
||||||
args.exp_dir = Path(args.exp_dir)
|
args.exp_dir = Path(args.exp_dir)
|
||||||
|
|
||||||
assert args.jit is False, "Support torchscript will be added later"
|
|
||||||
|
|
||||||
params = get_params()
|
params = get_params()
|
||||||
params.update(vars(args))
|
params.update(vars(args))
|
||||||
|
|
||||||
@ -192,6 +190,11 @@ def main():
|
|||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
if params.jit:
|
if params.jit:
|
||||||
|
# We won't use the forward() method of the model in C++, so just ignore
|
||||||
|
# it here.
|
||||||
|
# Otherwise, one of its arguments is a ragged tensor and is not
|
||||||
|
# torch scriptabe.
|
||||||
|
model.__class__.forward = torch.jit.ignore(model.__class__.forward)
|
||||||
logging.info("Using torch.jit.script")
|
logging.info("Using torch.jit.script")
|
||||||
model = torch.jit.script(model)
|
model = torch.jit.script(model)
|
||||||
filename = params.exp_dir / "cpu_jit.pt"
|
filename = params.exp_dir / "cpu_jit.pt"
|
||||||
|
69
egs/librispeech/ASR/pruned_transducer_stateless3/test_scaling.py
Executable file
69
egs/librispeech/ASR/pruned_transducer_stateless3/test_scaling.py
Executable file
@ -0,0 +1,69 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright 2022 Xiaomi Corp. (authors: Fangjun Kuang)
|
||||||
|
#
|
||||||
|
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
To run this file, do:
|
||||||
|
|
||||||
|
cd icefall/egs/librispeech/ASR
|
||||||
|
python ./pruned_transducer_stateless3/test_scaling.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from scaling import ActivationBalancer, ScaledConv1d, ScaledConv2d
|
||||||
|
|
||||||
|
|
||||||
|
def test_scaled_conv1d():
|
||||||
|
for bias in [True, False]:
|
||||||
|
conv1d = ScaledConv1d(
|
||||||
|
3,
|
||||||
|
6,
|
||||||
|
kernel_size=1,
|
||||||
|
stride=1,
|
||||||
|
padding=0,
|
||||||
|
bias=bias,
|
||||||
|
)
|
||||||
|
torch.jit.script(conv1d)
|
||||||
|
|
||||||
|
|
||||||
|
def test_scaled_conv2d():
|
||||||
|
for bias in [True, False]:
|
||||||
|
conv2d = ScaledConv2d(
|
||||||
|
in_channels=1,
|
||||||
|
out_channels=3,
|
||||||
|
kernel_size=3,
|
||||||
|
padding=1,
|
||||||
|
bias=bias,
|
||||||
|
)
|
||||||
|
torch.jit.script(conv2d)
|
||||||
|
|
||||||
|
|
||||||
|
def test_activation_balancer():
|
||||||
|
act = ActivationBalancer(
|
||||||
|
channel_dim=1, max_abs=10.0, min_positive=0.05, max_positive=1.0
|
||||||
|
)
|
||||||
|
torch.jit.script(act)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
test_scaled_conv1d()
|
||||||
|
test_scaled_conv2d()
|
||||||
|
test_activation_balancer()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -109,10 +109,12 @@ class Conformer(Transformer):
|
|||||||
x, pos_emb = self.encoder_pos(x)
|
x, pos_emb = self.encoder_pos(x)
|
||||||
x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C)
|
x = x.permute(1, 0, 2) # (N, T, C) -> (T, N, C)
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
# Caution: We assume the subsampling factor is 4!
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
# Caution: We assume the subsampling factor is 4!
|
# lengths = ((x_lens - 1) // 2 - 1) // 2 # issue an warning
|
||||||
lengths = ((x_lens - 1) // 2 - 1) // 2
|
#
|
||||||
|
# Note: rounding_mode in torch.div() is available only in torch >= 1.8.0
|
||||||
|
lengths = (((x_lens - 1) >> 1) - 1) >> 1
|
||||||
|
|
||||||
assert x.size(0) == lengths.max().item()
|
assert x.size(0) == lengths.max().item()
|
||||||
mask = make_pad_mask(lengths)
|
mask = make_pad_mask(lengths)
|
||||||
|
@ -183,8 +183,6 @@ def main():
|
|||||||
args = get_parser().parse_args()
|
args = get_parser().parse_args()
|
||||||
args.exp_dir = Path(args.exp_dir)
|
args.exp_dir = Path(args.exp_dir)
|
||||||
|
|
||||||
assert args.jit is False, "Support torchscript will be added later"
|
|
||||||
|
|
||||||
params = get_params()
|
params = get_params()
|
||||||
params.update(vars(args))
|
params.update(vars(args))
|
||||||
|
|
||||||
@ -226,6 +224,11 @@ def main():
|
|||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
if params.jit:
|
if params.jit:
|
||||||
|
# We won't use the forward() method of the model in C++, so just ignore
|
||||||
|
# it here.
|
||||||
|
# Otherwise, one of its arguments is a ragged tensor and is not
|
||||||
|
# torch scriptabe.
|
||||||
|
model.__class__.forward = torch.jit.ignore(model.__class__.forward)
|
||||||
logging.info("Using torch.jit.script")
|
logging.info("Using torch.jit.script")
|
||||||
model = torch.jit.script(model)
|
model = torch.jit.script(model)
|
||||||
filename = params.exp_dir / "cpu_jit.pt"
|
filename = params.exp_dir / "cpu_jit.pt"
|
||||||
|
@ -115,8 +115,6 @@ def main():
|
|||||||
args = get_parser().parse_args()
|
args = get_parser().parse_args()
|
||||||
args.exp_dir = Path(args.exp_dir)
|
args.exp_dir = Path(args.exp_dir)
|
||||||
|
|
||||||
assert args.jit is False, "Support torchscript will be added later"
|
|
||||||
|
|
||||||
params = get_params()
|
params = get_params()
|
||||||
params.update(vars(args))
|
params.update(vars(args))
|
||||||
|
|
||||||
@ -158,6 +156,11 @@ def main():
|
|||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
if params.jit:
|
if params.jit:
|
||||||
|
# We won't use the forward() method of the model in C++, so just ignore
|
||||||
|
# it here.
|
||||||
|
# Otherwise, one of its arguments is a ragged tensor and is not
|
||||||
|
# torch scriptabe.
|
||||||
|
model.__class__.forward = torch.jit.ignore(model.__class__.forward)
|
||||||
logging.info("Using torch.jit.script")
|
logging.info("Using torch.jit.script")
|
||||||
model = torch.jit.script(model)
|
model = torch.jit.script(model)
|
||||||
filename = params.exp_dir / "cpu_jit.pt"
|
filename = params.exp_dir / "cpu_jit.pt"
|
||||||
|
@ -184,8 +184,6 @@ def main():
|
|||||||
args = get_parser().parse_args()
|
args = get_parser().parse_args()
|
||||||
args.exp_dir = Path(args.exp_dir)
|
args.exp_dir = Path(args.exp_dir)
|
||||||
|
|
||||||
assert args.jit is False, "Support torchscript will be added later"
|
|
||||||
|
|
||||||
params = get_params()
|
params = get_params()
|
||||||
params.update(vars(args))
|
params.update(vars(args))
|
||||||
|
|
||||||
@ -229,6 +227,11 @@ def main():
|
|||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
if params.jit:
|
if params.jit:
|
||||||
|
# We won't use the forward() method of the model in C++, so just ignore
|
||||||
|
# it here.
|
||||||
|
# Otherwise, one of its arguments is a ragged tensor and is not
|
||||||
|
# torch scriptabe.
|
||||||
|
model.__class__.forward = torch.jit.ignore(model.__class__.forward)
|
||||||
logging.info("Using torch.jit.script")
|
logging.info("Using torch.jit.script")
|
||||||
model = torch.jit.script(model)
|
model = torch.jit.script(model)
|
||||||
filename = params.exp_dir / "cpu_jit.pt"
|
filename = params.exp_dir / "cpu_jit.pt"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user