diff --git a/README.md b/README.md index a876fb24e..523203aa4 100644 --- a/README.md +++ b/README.md @@ -338,7 +338,7 @@ We provide one model for this recipe: [Pruned stateless RNN-T: Conformer encoder #### Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss -The best results for Chinese CER(%) and English WER(%) respectivly (zh: Chinese, en: English): +The best results for Chinese CER(%) and English WER(%) respectively (zh: Chinese, en: English): |decoding-method | dev | dev_zh | dev_en | test | test_zh | test_en | |--|--|--|--|--|--|--| |greedy_search| 7.30 | 6.48 | 19.19 |7.39| 6.66 | 19.13| diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py index 5adb6c16a..a92182e8d 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py @@ -151,12 +151,14 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_decoder(self, decoder_model_filename: str): self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -170,6 +172,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7/onnx_pretrained.py b/egs/commonvoice/ASR/pruned_transducer_stateless7/onnx_pretrained.py index eee19191e..cf6ddfa36 100755 --- a/egs/commonvoice/ASR/pruned_transducer_stateless7/onnx_pretrained.py +++ b/egs/commonvoice/ASR/pruned_transducer_stateless7/onnx_pretrained.py @@ -152,12 +152,14 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_decoder(self, decoder_model_filename: str): self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -171,6 +173,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/libricss/SURT/prepare.sh b/egs/libricss/SURT/prepare.sh index 028240e44..3d2581d96 100755 --- a/egs/libricss/SURT/prepare.sh +++ b/egs/libricss/SURT/prepare.sh @@ -79,7 +79,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then # ln -sfv /path/to/rirs_noises $dl_dir/ # if [ ! -d $dl_dir/rirs_noises ]; then - lhotse download rirs_noises $dl_dir + lhotse download rir-noise $dl_dir/rirs_noises fi fi @@ -89,6 +89,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then # to $dl_dir/librispeech. We perform text normalization for the transcripts. # NOTE: Alignments are required for this recipe. mkdir -p data/manifests + lhotse prepare librispeech -p train-clean-100 -p train-clean-360 -p train-other-500 -p dev-clean \ -j 4 --alignments-dir $dl_dir/libri_alignments/LibriSpeech $dl_dir/librispeech data/manifests/ fi @@ -112,7 +113,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then # We assume that you have downloaded the RIRS_NOISES corpus # to $dl_dir/rirs_noises - lhotse prepare rir-noise -p real_rir -p iso_noise $dl_dir/rirs_noises data/manifests + lhotse prepare rir-noise -p real_rir -p iso_noise $dl_dir/rirs_noises/RIRS_NOISES data/manifests fi if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py index 5d7e2dfcd..a6c69d54f 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py @@ -136,6 +136,7 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) self.init_encoder_states() @@ -184,6 +185,7 @@ class OnnxModel: self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -197,6 +199,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/onnx_pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless2/onnx_pretrained.py index fb9e121e5..06159e56a 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/onnx_pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/onnx_pretrained.py @@ -129,6 +129,7 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) self.init_encoder_states() @@ -166,6 +167,7 @@ class OnnxModel: self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -179,6 +181,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-onnx-decode.py b/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-onnx-decode.py index 34d2e5630..487fc2114 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-onnx-decode.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-onnx-decode.py @@ -172,30 +172,35 @@ class Model: self.encoder = ort.InferenceSession( args.encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_decoder(self, args): self.decoder = ort.InferenceSession( args.decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_joiner(self, args): self.joiner = ort.InferenceSession( args.joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_joiner_encoder_proj(self, args): self.joiner_encoder_proj = ort.InferenceSession( args.joiner_encoder_proj_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_joiner_decoder_proj(self, args): self.joiner_decoder_proj = ort.InferenceSession( args.joiner_decoder_proj_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def run_encoder(self, x, h0, c0) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/onnx_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless3/onnx_pretrained.py index e10915086..de3e03da6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/onnx_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/onnx_pretrained.py @@ -150,12 +150,14 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_decoder(self, decoder_model_filename: str): self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -169,6 +171,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py index 810da8da6..b98248128 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/test_onnx.py @@ -78,6 +78,7 @@ def test_conv2d_subsampling(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() @@ -133,6 +134,7 @@ def test_rel_pos(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() @@ -220,6 +222,7 @@ def test_conformer_encoder_layer(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() @@ -304,6 +307,7 @@ def test_conformer_encoder(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() @@ -359,6 +363,7 @@ def test_conformer(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py b/egs/librispeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py index 29be4c655..6e290e799 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py @@ -138,6 +138,7 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) self.init_encoder_states() @@ -185,6 +186,7 @@ class OnnxModel: self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -198,6 +200,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/decoder.py b/egs/librispeech/ASR/pruned_transducer_stateless7/decoder.py index b085a1817..bfd019ff5 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/decoder.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/decoder.py @@ -71,6 +71,10 @@ class Decoder(nn.Module): groups=decoder_dim // 4, # group size == 4 bias=False, ) + else: + # To avoid `RuntimeError: Module 'Decoder' has no attribute 'conv'` + # when inference with torch.jit.script and context_size == 1 + self.conv = nn.Identity() def forward(self, y: torch.Tensor, need_pad: bool = True) -> torch.Tensor: """ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py index 1e9b67226..f3f7b1ea9 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/test_onnx.py @@ -74,6 +74,7 @@ def test_conv2d_subsampling(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() @@ -128,6 +129,7 @@ def test_rel_pos(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() @@ -204,6 +206,7 @@ def test_zipformer_encoder_layer(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() @@ -284,6 +287,7 @@ def test_zipformer_encoder(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() @@ -338,6 +342,7 @@ def test_zipformer(): session = ort.InferenceSession( filename, sess_options=options, + providers=["CPUExecutionProvider"], ) input_nodes = session.get_inputs() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py index 8ff02fbcb..494a34d97 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py @@ -326,41 +326,49 @@ def main(): encoder = ort.InferenceSession( args.encoder_model_filename, sess_options=session_opts, + providers=["CPUExecutionProvider"], ) decoder = ort.InferenceSession( args.decoder_model_filename, sess_options=session_opts, + providers=["CPUExecutionProvider"], ) joiner = ort.InferenceSession( args.joiner_model_filename, sess_options=session_opts, + providers=["CPUExecutionProvider"], ) joiner_encoder_proj = ort.InferenceSession( args.joiner_encoder_proj_model_filename, sess_options=session_opts, + providers=["CPUExecutionProvider"], ) joiner_decoder_proj = ort.InferenceSession( args.joiner_decoder_proj_model_filename, sess_options=session_opts, + providers=["CPUExecutionProvider"], ) lconv = ort.InferenceSession( args.lconv_filename, sess_options=session_opts, + providers=["CPUExecutionProvider"], ) frame_reducer = ort.InferenceSession( args.frame_reducer_filename, sess_options=session_opts, + providers=["CPUExecutionProvider"], ) ctc_output = ort.InferenceSession( args.ctc_output_filename, sess_options=session_opts, + providers=["CPUExecutionProvider"], ) sp = spm.SentencePieceProcessor() diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/onnx_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/onnx_pretrained.py index 8192e01fd..04861ea37 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/onnx_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/onnx_pretrained.py @@ -130,6 +130,7 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) self.init_encoder_states() @@ -229,6 +230,7 @@ class OnnxModel: self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -242,6 +244,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer.py index a5c422959..c7e45564f 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer.py @@ -865,7 +865,7 @@ class ZipformerEncoderLayer(nn.Module): return final_dropout_rate else: return initial_dropout_rate - ( - initial_dropout_rate * final_dropout_rate + initial_dropout_rate - final_dropout_rate ) * (self.batch_count / warmup_period) def forward( diff --git a/egs/librispeech/ASR/streaming_conformer_ctc/conformer.py b/egs/librispeech/ASR/streaming_conformer_ctc/conformer.py index 5fe92172e..be6fabf35 100644 --- a/egs/librispeech/ASR/streaming_conformer_ctc/conformer.py +++ b/egs/librispeech/ASR/streaming_conformer_ctc/conformer.py @@ -230,7 +230,7 @@ class Conformer(Transformer): x, pos_emb, mask=mask, src_key_padding_mask=src_key_padding_mask ) # (T, B, F) else: - x = self.encoder(x, pos_emb, src_key_padding_mask=mask) # (T, B, F) + x = self.encoder(x, pos_emb, src_key_padding_mask=src_key_padding_mask) # (T, B, F) if self.normalize_before: x = self.after_norm(x) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py index 500b2cd09..e62491444 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py @@ -146,6 +146,7 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) self.init_encoder_states() @@ -236,6 +237,7 @@ class OnnxModel: self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -249,6 +251,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained.py b/egs/librispeech/ASR/zipformer/onnx_pretrained.py index 032b07721..334376093 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained.py @@ -151,12 +151,14 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_decoder(self, decoder_model_filename: str): self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -170,6 +172,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/multi_zh-hans/ASR/prepare.sh b/egs/multi_zh-hans/ASR/prepare.sh index 5d0fe66a4..c09b9c1de 100755 --- a/egs/multi_zh-hans/ASR/prepare.sh +++ b/egs/multi_zh-hans/ASR/prepare.sh @@ -49,7 +49,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then log "Stage 2: Prepare THCHS-30" if [ ! -d $dl_dir/thchs30 ]; then log "Downloading THCHS-30" - lhotse download thchs30 $dl_dir/thchs30 + lhotse download thchs-30 $dl_dir/thchs30 fi if [ ! -f data/manifests/.thchs30.done ]; then diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py b/egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py index 3bfb832fb..3485d4005 100755 --- a/egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py +++ b/egs/tal_csasr/ASR/pruned_transducer_stateless5/decode.py @@ -724,12 +724,12 @@ def main(): ) save_results( params=params, - test_set_name=test_set, + test_set_name=test_set + "-zh", results_dict=zh_results_dict, ) save_results( params=params, - test_set_name=test_set, + test_set_name=test_set + "-en", results_dict=en_results_dict, ) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/onnx_check.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/onnx_check.py index a46ff5a07..2d46eede1 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/onnx_check.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/onnx_check.py @@ -258,6 +258,7 @@ def main(): encoder_session = ort.InferenceSession( args.onnx_encoder_filename, sess_options=options, + providers=["CPUExecutionProvider"], ) test_encoder(model, encoder_session) @@ -265,6 +266,7 @@ def main(): decoder_session = ort.InferenceSession( args.onnx_decoder_filename, sess_options=options, + providers=["CPUExecutionProvider"], ) test_decoder(model, decoder_session) @@ -272,14 +274,17 @@ def main(): joiner_session = ort.InferenceSession( args.onnx_joiner_filename, sess_options=options, + providers=["CPUExecutionProvider"], ) joiner_encoder_proj_session = ort.InferenceSession( args.onnx_joiner_encoder_proj_filename, sess_options=options, + providers=["CPUExecutionProvider"], ) joiner_decoder_proj_session = ort.InferenceSession( args.onnx_joiner_decoder_proj_filename, sess_options=options, + providers=["CPUExecutionProvider"], ) test_joiner( model, diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py index facfc2258..c31db6859 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py @@ -139,6 +139,7 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) self.init_encoder_states() @@ -186,6 +187,7 @@ class OnnxModel: self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -199,6 +201,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained.py index e7c8b4556..c784853ee 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained.py @@ -158,12 +158,14 @@ class OnnxModel: self.encoder = ort.InferenceSession( encoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) def init_decoder(self, decoder_model_filename: str): self.decoder = ort.InferenceSession( decoder_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) decoder_meta = self.decoder.get_modelmeta().custom_metadata_map @@ -177,6 +179,7 @@ class OnnxModel: self.joiner = ort.InferenceSession( joiner_model_filename, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) joiner_meta = self.joiner.get_modelmeta().custom_metadata_map diff --git a/egs/yesno/ASR/tdnn/onnx_pretrained.py b/egs/yesno/ASR/tdnn/onnx_pretrained.py index b23a2a381..72a1d69c8 100755 --- a/egs/yesno/ASR/tdnn/onnx_pretrained.py +++ b/egs/yesno/ASR/tdnn/onnx_pretrained.py @@ -54,6 +54,7 @@ class OnnxModel: self.model = ort.InferenceSession( nn_model, sess_options=self.session_opts, + providers=["CPUExecutionProvider"], ) meta = self.model.get_modelmeta().custom_metadata_map