mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
Update doc about exporting LSTM models to ncnn (#914)
This commit is contained in:
parent
c01175679e
commit
52d7cdd1a6
@ -0,0 +1,18 @@
|
|||||||
|
2023-02-17 11:22:42,862 INFO [export-for-ncnn.py:222] device: cpu
|
||||||
|
2023-02-17 11:22:42,865 INFO [export-for-ncnn.py:231] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'dim_feedforward': 2048, 'decoder_dim': 512, 'joiner_dim': 512, 'is_pnnx': False, 'model_warm_step': 3000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 10:26:16 2023', 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': False, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '6d7a559-dirty', 'icefall-git-date': 'Thu Feb 16 19:47:54 2023', 'icefall-path': '/star-fj/fangjun/open-source/icefall-2', 'k2-path': '/star-fj/fangjun/open-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '10.177.6.147'}, 'epoch': 99, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp'), 'bpe_model': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/bpe.model', 'context_size': 2, 'use_averaged_model': False, 'num_encoder_layers': 12, 'encoder_dim': 512, 'rnn_hidden_size': 1024, 'aux_layer_period': 0, 'blank_id': 0, 'vocab_size': 500}
|
||||||
|
2023-02-17 11:22:42,865 INFO [export-for-ncnn.py:235] About to create model
|
||||||
|
2023-02-17 11:22:43,239 INFO [train.py:472] Disable giga
|
||||||
|
2023-02-17 11:22:43,249 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/epoch-99.pt
|
||||||
|
2023-02-17 11:22:44,595 INFO [export-for-ncnn.py:324] encoder parameters: 83137520
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:325] decoder parameters: 257024
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:326] joiner parameters: 781812
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:327] total parameters: 84176356
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:329] Using torch.jit.trace()
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:331] Exporting encoder
|
||||||
|
2023-02-17 11:22:48,182 INFO [export-for-ncnn.py:158] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
2023-02-17 11:22:48,183 INFO [export-for-ncnn.py:335] Exporting decoder
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/lstm_transducer_stateless2/decoder.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
need_pad = bool(need_pad)
|
||||||
|
2023-02-17 11:22:48,259 INFO [export-for-ncnn.py:180] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
2023-02-17 11:22:48,259 INFO [export-for-ncnn.py:339] Exporting joiner
|
||||||
|
2023-02-17 11:22:48,304 INFO [export-for-ncnn.py:207] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.pt
|
@ -0,0 +1,44 @@
|
|||||||
|
Don't Use GPU. has_gpu: 0, config.use_vulkan_compute: 1
|
||||||
|
num encoder conv layers: 28
|
||||||
|
num joiner conv layers: 3
|
||||||
|
num files: 3
|
||||||
|
Processing ../test_wavs/1089-134686-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0002.wav
|
||||||
|
Processing ../test_wavs/1089-134686-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0002.wav
|
||||||
|
----------encoder----------
|
||||||
|
conv_15 : max = 15.942385 threshold = 15.930708 scale = 7.972025
|
||||||
|
conv_16 : max = 44.978855 threshold = 17.031788 scale = 7.456645
|
||||||
|
conv_17 : max = 17.868437 threshold = 7.830528 scale = 16.218575
|
||||||
|
linear_18 : max = 3.107259 threshold = 1.194808 scale = 106.293236
|
||||||
|
linear_19 : max = 6.193777 threshold = 4.634748 scale = 27.401705
|
||||||
|
linear_20 : max = 9.259933 threshold = 2.606617 scale = 48.722160
|
||||||
|
linear_21 : max = 5.186600 threshold = 4.790260 scale = 26.512129
|
||||||
|
linear_22 : max = 9.759041 threshold = 2.265832 scale = 56.050053
|
||||||
|
linear_23 : max = 3.931209 threshold = 3.099090 scale = 40.979767
|
||||||
|
linear_24 : max = 10.324160 threshold = 2.215561 scale = 57.321835
|
||||||
|
linear_25 : max = 3.800708 threshold = 3.599352 scale = 35.284134
|
||||||
|
linear_26 : max = 10.492444 threshold = 3.153369 scale = 40.274391
|
||||||
|
linear_27 : max = 3.660161 threshold = 2.720994 scale = 46.674126
|
||||||
|
linear_28 : max = 9.415265 threshold = 3.174434 scale = 40.007133
|
||||||
|
linear_29 : max = 4.038418 threshold = 3.118534 scale = 40.724262
|
||||||
|
linear_30 : max = 10.072084 threshold = 3.936867 scale = 32.259155
|
||||||
|
linear_31 : max = 4.342712 threshold = 3.599489 scale = 35.282787
|
||||||
|
linear_32 : max = 11.340535 threshold = 3.120308 scale = 40.701103
|
||||||
|
linear_33 : max = 3.846987 threshold = 3.630030 scale = 34.985939
|
||||||
|
linear_34 : max = 10.686298 threshold = 2.204571 scale = 57.607586
|
||||||
|
linear_35 : max = 4.904821 threshold = 4.575518 scale = 27.756420
|
||||||
|
linear_36 : max = 11.806659 threshold = 2.585589 scale = 49.118401
|
||||||
|
linear_37 : max = 6.402340 threshold = 5.047157 scale = 25.162680
|
||||||
|
linear_38 : max = 11.174589 threshold = 1.923361 scale = 66.030258
|
||||||
|
linear_39 : max = 16.178576 threshold = 7.556058 scale = 16.807705
|
||||||
|
linear_40 : max = 12.901954 threshold = 5.301267 scale = 23.956539
|
||||||
|
linear_41 : max = 14.839805 threshold = 7.597429 scale = 16.716181
|
||||||
|
linear_42 : max = 10.178945 threshold = 2.651595 scale = 47.895699
|
||||||
|
----------joiner----------
|
||||||
|
linear_2 : max = 24.829245 threshold = 16.627592 scale = 7.637907
|
||||||
|
linear_1 : max = 10.746186 threshold = 5.255032 scale = 24.167313
|
||||||
|
linear_3 : max = 1.000000 threshold = 0.999756 scale = 127.031013
|
||||||
|
ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\(^0^)/...233...
|
@ -0,0 +1,6 @@
|
|||||||
|
2023-02-17 11:37:30,861 INFO [streaming-ncnn-decode.py:255] {'tokens': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav'}
|
||||||
|
2023-02-17 11:37:31,425 INFO [streaming-ncnn-decode.py:263] Constructing Fbank computer
|
||||||
|
2023-02-17 11:37:31,427 INFO [streaming-ncnn-decode.py:266] Reading sound files: ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
|
||||||
|
2023-02-17 11:37:31,431 INFO [streaming-ncnn-decode.py:271] torch.Size([106000])
|
||||||
|
2023-02-17 11:37:34,115 INFO [streaming-ncnn-decode.py:342] ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
|
||||||
|
2023-02-17 11:37:34,115 INFO [streaming-ncnn-decode.py:343] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
749
docs/source/model-export/export-ncnn-conv-emformer.rst
Normal file
749
docs/source/model-export/export-ncnn-conv-emformer.rst
Normal file
@ -0,0 +1,749 @@
|
|||||||
|
.. _export_conv_emformer_transducer_models_to_ncnn:
|
||||||
|
|
||||||
|
Export ConvEmformer transducer models to ncnn
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
We use the pre-trained model from the following repository as an example:
|
||||||
|
|
||||||
|
- `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
|
||||||
|
|
||||||
|
We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please use a more recent version of PyTorch. For instance, ``torch 1.8``
|
||||||
|
may ``not`` work.
|
||||||
|
|
||||||
|
1. Download the pre-trained model
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
You can also refer to `<https://k2-fsa.github.io/sherpa/cpp/pretrained_models/online_transducer.html#icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_ to download the pre-trained model.
|
||||||
|
|
||||||
|
You have to install `git-lfs`_ before you continue.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
||||||
|
|
||||||
|
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
|
||||||
|
|
||||||
|
|
||||||
|
In the above code, we downloaded the pre-trained model into the directory
|
||||||
|
``egs/librispeech/ASR/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05``.
|
||||||
|
|
||||||
|
.. _export_for_ncnn_install_ncnn_and_pnnx:
|
||||||
|
|
||||||
|
2. Install ncnn and pnnx
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# We put ncnn into $HOME/open-source/ncnn
|
||||||
|
# You can change it to anywhere you like
|
||||||
|
|
||||||
|
cd $HOME
|
||||||
|
mkdir -p open-source
|
||||||
|
cd open-source
|
||||||
|
|
||||||
|
git clone https://github.com/csukuangfj/ncnn
|
||||||
|
cd ncnn
|
||||||
|
git submodule update --recursive --init
|
||||||
|
|
||||||
|
# Note: We don't use "python setup.py install" or "pip install ." here
|
||||||
|
|
||||||
|
mkdir -p build-wheel
|
||||||
|
cd build-wheel
|
||||||
|
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DNCNN_PYTHON=ON \
|
||||||
|
-DNCNN_BUILD_BENCHMARK=OFF \
|
||||||
|
-DNCNN_BUILD_EXAMPLES=OFF \
|
||||||
|
-DNCNN_BUILD_TOOLS=ON \
|
||||||
|
..
|
||||||
|
|
||||||
|
make -j4
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
# Note: $PWD here is $HOME/open-source/ncnn
|
||||||
|
|
||||||
|
export PYTHONPATH=$PWD/python:$PYTHONPATH
|
||||||
|
export PATH=$PWD/tools/pnnx/build/src:$PATH
|
||||||
|
export PATH=$PWD/build-wheel/tools/quantize:$PATH
|
||||||
|
|
||||||
|
# Now build pnnx
|
||||||
|
cd tools/pnnx
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make -j4
|
||||||
|
|
||||||
|
./src/pnnx
|
||||||
|
|
||||||
|
Congratulations! You have successfully installed the following components:
|
||||||
|
|
||||||
|
- ``pnxx``, which is an executable located in
|
||||||
|
``$HOME/open-source/ncnn/tools/pnnx/build/src``. We will use
|
||||||
|
it to convert models exported by ``torch.jit.trace()``.
|
||||||
|
- ``ncnn2int8``, which is an executable located in
|
||||||
|
``$HOME/open-source/ncnn/build-wheel/tools/quantize``. We will use
|
||||||
|
it to quantize our models to ``int8``.
|
||||||
|
- ``ncnn.cpython-38-x86_64-linux-gnu.so``, which is a Python module located
|
||||||
|
in ``$HOME/open-source/ncnn/python/ncnn``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
I am using ``Python 3.8``, so it
|
||||||
|
is ``ncnn.cpython-38-x86_64-linux-gnu.so``. If you use a different
|
||||||
|
version, say, ``Python 3.9``, the name would be
|
||||||
|
``ncnn.cpython-39-x86_64-linux-gnu.so``.
|
||||||
|
|
||||||
|
Also, if you are not using Linux, the file name would also be different.
|
||||||
|
But that does not matter. As long as you can compile it, it should work.
|
||||||
|
|
||||||
|
We have set up ``PYTHONPATH`` so that you can use ``import ncnn`` in your
|
||||||
|
Python code. We have also set up ``PATH`` so that you can use
|
||||||
|
``pnnx`` and ``ncnn2int8`` later in your terminal.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please don't use `<https://github.com/tencent/ncnn>`_.
|
||||||
|
We have made some modifications to the offical `ncnn`_.
|
||||||
|
|
||||||
|
We will synchronize `<https://github.com/csukuangfj/ncnn>`_ periodically
|
||||||
|
with the official one.
|
||||||
|
|
||||||
|
3. Export the model via torch.jit.trace()
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
First, let us rename our pre-trained model:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp
|
||||||
|
|
||||||
|
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-30.pt
|
||||||
|
|
||||||
|
cd ../..
|
||||||
|
|
||||||
|
Next, we use the following code to export our model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
dir=./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/
|
||||||
|
|
||||||
|
./conv_emformer_transducer_stateless2/export-for-ncnn.py \
|
||||||
|
--exp-dir $dir/exp \
|
||||||
|
--bpe-model $dir/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 30 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
\
|
||||||
|
--num-encoder-layers 12 \
|
||||||
|
--chunk-length 32 \
|
||||||
|
--cnn-module-kernel 31 \
|
||||||
|
--left-context-length 32 \
|
||||||
|
--right-context-length 8 \
|
||||||
|
--memory-size 32 \
|
||||||
|
--encoder-dim 512
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We have renamed our model to ``epoch-30.pt`` so that we can use ``--epoch 30``.
|
||||||
|
There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
|
||||||
|
|
||||||
|
If you have trained a model by yourself and if you have all checkpoints
|
||||||
|
available, please first use ``decode.py`` to tune ``--epoch --avg``
|
||||||
|
and select the best combination with with ``--use-averaged-model 1``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You will see the following log output:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/export-conv-emformer-transducer-for-ncnn-output.txt
|
||||||
|
|
||||||
|
The log shows the model has ``75490012`` parameters, i.e., ``~75 M``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 289M Jan 11 12:05 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
|
||||||
|
|
||||||
|
You can see that the file size of the pre-trained model is ``289 MB``, which
|
||||||
|
is roughly equal to ``75490012*4/1024/1024 = 287.97 MB``.
|
||||||
|
|
||||||
|
After running ``conv_emformer_transducer_stateless2/export-for-ncnn.py``,
|
||||||
|
we will get the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*pnnx*
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1010K Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 283M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 3.0M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
|
||||||
|
.. _conv-emformer-step-4-export-torchscript-model-via-pnnx:
|
||||||
|
|
||||||
|
4. Export torchscript model via pnnx
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Make sure you have set up the ``PATH`` environment variable. Otherwise,
|
||||||
|
it will throw an error saying that ``pnnx`` could not be found.
|
||||||
|
|
||||||
|
Now, it's time to export our models to `ncnn`_ via ``pnnx``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
It will generate the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*ncnn*{bin,param}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 503K Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 142M Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 79K Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1.5M Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
There are two types of files:
|
||||||
|
|
||||||
|
- ``param``: It is a text file containing the model architectures. You can
|
||||||
|
use a text editor to view its content.
|
||||||
|
- ``bin``: It is a binary file containing the model parameters.
|
||||||
|
|
||||||
|
We compare the file sizes of the models below before and after converting via ``pnnx``:
|
||||||
|
|
||||||
|
.. see https://tableconvert.com/restructuredtext-generator
|
||||||
|
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+==================================+============+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 283 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin | 142 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin | 503 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin | 1.5 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes of the models after conversion are about one half
|
||||||
|
of the models before conversion:
|
||||||
|
|
||||||
|
- encoder: 283 MB vs 142 MB
|
||||||
|
- decoder: 1010 KB vs 503 KB
|
||||||
|
- joiner: 3.0 MB vs 1.5 MB
|
||||||
|
|
||||||
|
The reason is that by default ``pnnx`` converts ``float32`` parameters
|
||||||
|
to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
|
||||||
|
for ``float16``. Thus, it is ``twice smaller`` after conversion.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
|
||||||
|
won't convert ``float32`` to ``float16``.
|
||||||
|
|
||||||
|
5. Test the exported models in icefall
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We assume you have set up the environment variable ``PYTHONPATH`` when
|
||||||
|
building `ncnn`_.
|
||||||
|
|
||||||
|
Now we have successfully converted our pre-trained model to `ncnn`_ format.
|
||||||
|
The generated 6 files are what we need. You can use the following code to
|
||||||
|
test the converted models:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
|
||||||
|
--tokens ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
|
||||||
|
only 1 wave file as input.
|
||||||
|
|
||||||
|
The output is given below:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/test-streaming-ncnn-decode-conv-emformer-transducer-libri.txt
|
||||||
|
|
||||||
|
Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
|
||||||
|
|
||||||
|
|
||||||
|
.. _conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn:
|
||||||
|
|
||||||
|
6. Modify the exported encoder for sherpa-ncnn
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
In order to use the exported models in `sherpa-ncnn`_, we have to modify
|
||||||
|
``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
7767517
|
||||||
|
1060 1342
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation** of the above three lines:
|
||||||
|
|
||||||
|
1. ``7767517``, it is a magic number and should not be changed.
|
||||||
|
2. ``1060 1342``, the first number ``1060`` specifies the number of layers
|
||||||
|
in this file, while ``1342`` specifies the number of intermediate outputs
|
||||||
|
of this file
|
||||||
|
3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
|
||||||
|
is the layer name of this layer; ``0`` means this layer has no input;
|
||||||
|
``1`` means this layer has one output; ``in0`` is the output name of
|
||||||
|
this layer.
|
||||||
|
|
||||||
|
We need to add 1 extra line and also increment the number of layers.
|
||||||
|
The result looks like below:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
1061 1342
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation**
|
||||||
|
|
||||||
|
1. ``7767517``, it is still the same
|
||||||
|
2. ``1061 1342``, we have added an extra layer, so we need to update ``1060`` to ``1061``.
|
||||||
|
We don't need to change ``1342`` since the newly added layer has no inputs or outputs.
|
||||||
|
3. ``SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512``
|
||||||
|
This line is newly added. Its explanation is given below:
|
||||||
|
|
||||||
|
- ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
|
||||||
|
- ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
|
||||||
|
- ``0 0`` means this layer has no inputs or output. Must be ``0 0``
|
||||||
|
- ``0=1``, 0 is the key and 1 is the value. MUST be ``0=1``
|
||||||
|
- ``1=12``, 1 is the key and 12 is the value of the
|
||||||
|
parameter ``--num-encoder-layers`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``2=32``, 2 is the key and 32 is the value of the
|
||||||
|
parameter ``--memory-size`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``3=31``, 3 is the key and 31 is the value of the
|
||||||
|
parameter ``--cnn-module-kernel`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``4=8``, 4 is the key and 8 is the value of the
|
||||||
|
parameter ``--left-context-length`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``5=32``, 5 is the key and 32 is the value of the
|
||||||
|
parameter ``--chunk-length`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``6=8``, 6 is the key and 8 is the value of the
|
||||||
|
parameter ``--right-context-length`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``7=512``, 7 is the key and 512 is the value of the
|
||||||
|
parameter ``--encoder-dim`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
|
||||||
|
For ease of reference, we list the key-value pairs that you need to add
|
||||||
|
in the following table. If your model has a different setting, please
|
||||||
|
change the values for ``SherpaMetaData`` accordingly. Otherwise, you
|
||||||
|
will be ``SAD``.
|
||||||
|
|
||||||
|
+------+-----------------------------+
|
||||||
|
| key | value |
|
||||||
|
+======+=============================+
|
||||||
|
| 0 | 1 (fixed) |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 1 | ``--num-encoder-layers`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 2 | ``--memory-size`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 3 | ``--cnn-module-kernel`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 4 | ``--left-context-length`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 5 | ``--chunk-length`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 6 | ``--right-context-length`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 7 | ``--encoder-dim`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
|
||||||
|
4. ``Input in0 0 1 in0``. No need to change it.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
When you add a new layer ``SherpaMetaData``, please remember to update the
|
||||||
|
number of layers. In our case, update ``1060`` to ``1061``. Otherwise,
|
||||||
|
you will be SAD later.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
After adding the new layer ``SherpaMetaData``, you cannot use this model
|
||||||
|
with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
|
||||||
|
supported only in `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ is very flexible. You can add new layers to it just by text-editing
|
||||||
|
the ``param`` file! You don't need to change the ``bin`` file.
|
||||||
|
|
||||||
|
Now you can use this model in `sherpa-ncnn`_.
|
||||||
|
Please refer to the following documentation:
|
||||||
|
|
||||||
|
- Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
|
||||||
|
- ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
|
||||||
|
- ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
|
||||||
|
- Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
|
||||||
|
|
||||||
|
We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
|
||||||
|
|
||||||
|
- `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
|
||||||
|
|
||||||
|
You can find more usages there.
|
||||||
|
|
||||||
|
7. (Optional) int8 quantization with sherpa-ncnn
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
|
This step is optional.
|
||||||
|
|
||||||
|
In this step, we describe how to quantize our model with ``int8``.
|
||||||
|
|
||||||
|
Change :ref:`conv-emformer-step-4-export-torchscript-model-via-pnnx` to
|
||||||
|
disable ``fp16`` when using ``pnnx``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt fp16=0
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt fp16=0
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
|
||||||
|
support quantizing the decoder model yet. We will update this documentation
|
||||||
|
once `ncnn`_ supports it. (Maybe in this year, 2023).
|
||||||
|
|
||||||
|
It will generate the following files
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*_jit_trace-pnnx.ncnn.{param,bin}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 503K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 283M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 79K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 3.0M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
Let us compare again the file sizes:
|
||||||
|
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 283 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes are doubled when we disable ``fp16``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You can again use ``streaming-ncnn-decode.py`` to test the exported models.
|
||||||
|
|
||||||
|
Next, follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
|
||||||
|
to modify ``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Change
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
1060 1342
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
to
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
1061 1342
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
|
||||||
|
to change the values for ``SherpaMetaData`` if your model uses a different setting.
|
||||||
|
|
||||||
|
|
||||||
|
Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
|
||||||
|
`sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# We will download sherpa-ncnn to $HOME/open-source/
|
||||||
|
# You can change it to anywhere you like.
|
||||||
|
cd $HOME
|
||||||
|
mkdir -p open-source
|
||||||
|
|
||||||
|
cd open-source
|
||||||
|
git clone https://github.com/k2-fsa/sherpa-ncnn
|
||||||
|
cd sherpa-ncnn
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make -j 4
|
||||||
|
|
||||||
|
./bin/generate-int8-scale-table
|
||||||
|
|
||||||
|
export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
|
||||||
|
|
||||||
|
The output of the above commands are:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
(py38) kuangfangjun:build$ generate-int8-scale-table
|
||||||
|
Please provide 10 arg. Currently given: 1
|
||||||
|
Usage:
|
||||||
|
generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
|
||||||
|
|
||||||
|
Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
|
||||||
|
|
||||||
|
We need to create a file ``wave_filenames.txt``, in which we need to put
|
||||||
|
some calibration wave files. For testing purpose, we put the ``test_wavs``
|
||||||
|
from the pre-trained model repository `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
cat <<EOF > wave_filenames.txt
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
../test_wavs/1221-135766-0001.wav
|
||||||
|
../test_wavs/1221-135766-0002.wav
|
||||||
|
EOF
|
||||||
|
|
||||||
|
Now we can calculate the scales needed for quantization with the calibration data:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
generate-int8-scale-table \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./encoder-scale-table.txt \
|
||||||
|
./joiner-scale-table.txt \
|
||||||
|
./wave_filenames.txt
|
||||||
|
|
||||||
|
The output logs are in the following:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/generate-int-8-scale-table-for-conv-emformer.txt
|
||||||
|
|
||||||
|
It generates the following two files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ ls -lh encoder-scale-table.txt joiner-scale-table.txt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 955K Jan 11 17:28 encoder-scale-table.txt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 18K Jan 11 17:28 joiner-scale-table.txt
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Definitely, you need more calibration data to compute the scale table.
|
||||||
|
|
||||||
|
Finally, let us use the scale table to quantize our models into ``int8``.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ncnn2int8
|
||||||
|
|
||||||
|
usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
|
||||||
|
|
||||||
|
First, we quantize the encoder model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
ncnn2int8 \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./encoder-scale-table.txt
|
||||||
|
|
||||||
|
Next, we quantize the joiner model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ncnn2int8 \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./joiner-scale-table.txt
|
||||||
|
|
||||||
|
The above two commands generate the following 4 files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 99M Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 78K Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 774K Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 496 Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.param
|
||||||
|
|
||||||
|
Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
|
||||||
|
|
||||||
|
You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
|
||||||
|
and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
|
||||||
|
|
||||||
|
For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
|
||||||
|
replace the following invocation:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
sherpa-ncnn \
|
||||||
|
../data/lang_bpe_500/tokens.txt \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
with
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
sherpa-ncnn \
|
||||||
|
../data/lang_bpe_500/tokens.txt \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
|
||||||
|
The following table compares again the file sizes:
|
||||||
|
|
||||||
|
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 283 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.int8.bin | 99 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.int8.bin | 774 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes of the model after ``int8`` quantization
|
||||||
|
are much smaller.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Currently, only linear layers and convolutional layers are quantized
|
||||||
|
with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You need to test the recognition accuracy after ``int8`` quantization.
|
||||||
|
|
||||||
|
You can find the speed comparison at `<https://github.com/k2-fsa/sherpa-ncnn/issues/44>`_.
|
||||||
|
|
||||||
|
|
||||||
|
That's it! Have fun with `sherpa-ncnn`_!
|
644
docs/source/model-export/export-ncnn-lstm.rst
Normal file
644
docs/source/model-export/export-ncnn-lstm.rst
Normal file
@ -0,0 +1,644 @@
|
|||||||
|
.. _export_lstm_transducer_models_to_ncnn:
|
||||||
|
|
||||||
|
Export LSTM transducer models to ncnn
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
We use the pre-trained model from the following repository as an example:
|
||||||
|
|
||||||
|
`<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
|
||||||
|
|
||||||
|
We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please use a more recent version of PyTorch. For instance, ``torch 1.8``
|
||||||
|
may ``not`` work.
|
||||||
|
|
||||||
|
1. Download the pre-trained model
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
You have to install `git-lfs`_ before you continue.
|
||||||
|
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
|
||||||
|
git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
|
||||||
|
|
||||||
|
In the above code, we downloaded the pre-trained model into the directory
|
||||||
|
``egs/librispeech/ASR/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03``.
|
||||||
|
|
||||||
|
2. Install ncnn and pnnx
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Please refer to :ref:`export_for_ncnn_install_ncnn_and_pnnx` .
|
||||||
|
|
||||||
|
|
||||||
|
3. Export the model via torch.jit.trace()
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
First, let us rename our pre-trained model:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp
|
||||||
|
|
||||||
|
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
||||||
|
|
||||||
|
cd ../..
|
||||||
|
|
||||||
|
Next, we use the following code to export our model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
dir=./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/export-for-ncnn.py \
|
||||||
|
--exp-dir $dir/exp \
|
||||||
|
--bpe-model $dir/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--num-encoder-layers 12 \
|
||||||
|
--encoder-dim 512 \
|
||||||
|
--rnn-hidden-size 1024
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We have renamed our model to ``epoch-99.pt`` so that we can use ``--epoch 99``.
|
||||||
|
There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
|
||||||
|
|
||||||
|
If you have trained a model by yourself and if you have all checkpoints
|
||||||
|
available, please first use ``decode.py`` to tune ``--epoch --avg``
|
||||||
|
and select the best combination with with ``--use-averaged-model 1``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You will see the following log output:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/export-lstm-transducer-for-ncnn-output.txt
|
||||||
|
|
||||||
|
The log shows the model has ``84176356`` parameters, i.e., ``~84 M``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/pretrained-iter-468000-avg-16.pt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 324M Feb 17 10:34 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/pretrained-iter-468000-avg-16.pt
|
||||||
|
|
||||||
|
You can see that the file size of the pre-trained model is ``324 MB``, which
|
||||||
|
is roughly equal to ``84176356*4/1024/1024 = 321.107 MB``.
|
||||||
|
|
||||||
|
After running ``lstm_transducer_stateless2/export-for-ncnn.py``,
|
||||||
|
we will get the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*pnnx.pt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1010K Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 318M Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 3.0M Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
|
||||||
|
.. _lstm-transducer-step-4-export-torchscript-model-via-pnnx:
|
||||||
|
|
||||||
|
4. Export torchscript model via pnnx
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Make sure you have set up the ``PATH`` environment variable
|
||||||
|
in :ref:`export_for_ncnn_install_ncnn_and_pnnx`. Otherwise,
|
||||||
|
it will throw an error saying that ``pnnx`` could not be found.
|
||||||
|
|
||||||
|
Now, it's time to export our models to `ncnn`_ via ``pnnx``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
It will generate the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*ncnn*{bin,param}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 503K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 159M Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 21K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1.5M Feb 17 11:33 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Feb 17 11:33 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
|
||||||
|
There are two types of files:
|
||||||
|
|
||||||
|
- ``param``: It is a text file containing the model architectures. You can
|
||||||
|
use a text editor to view its content.
|
||||||
|
- ``bin``: It is a binary file containing the model parameters.
|
||||||
|
|
||||||
|
We compare the file sizes of the models below before and after converting via ``pnnx``:
|
||||||
|
|
||||||
|
.. see https://tableconvert.com/restructuredtext-generator
|
||||||
|
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+==================================+============+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 318 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin | 159 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin | 503 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin | 1.5 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes of the models after conversion are about one half
|
||||||
|
of the models before conversion:
|
||||||
|
|
||||||
|
- encoder: 318 MB vs 159 MB
|
||||||
|
- decoder: 1010 KB vs 503 KB
|
||||||
|
- joiner: 3.0 MB vs 1.5 MB
|
||||||
|
|
||||||
|
The reason is that by default ``pnnx`` converts ``float32`` parameters
|
||||||
|
to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
|
||||||
|
for ``float16``. Thus, it is ``twice smaller`` after conversion.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
|
||||||
|
won't convert ``float32`` to ``float16``.
|
||||||
|
|
||||||
|
5. Test the exported models in icefall
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We assume you have set up the environment variable ``PYTHONPATH`` when
|
||||||
|
building `ncnn`_.
|
||||||
|
|
||||||
|
Now we have successfully converted our pre-trained model to `ncnn`_ format.
|
||||||
|
The generated 6 files are what we need. You can use the following code to
|
||||||
|
test the converted models:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
|
||||||
|
--tokens ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
|
||||||
|
only 1 wave file as input.
|
||||||
|
|
||||||
|
The output is given below:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/test-streaming-ncnn-decode-lstm-transducer-libri.txt
|
||||||
|
|
||||||
|
Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
|
||||||
|
|
||||||
|
.. _lstm-modify-the-exported-encoder-for-sherpa-ncnn:
|
||||||
|
|
||||||
|
6. Modify the exported encoder for sherpa-ncnn
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
In order to use the exported models in `sherpa-ncnn`_, we have to modify
|
||||||
|
``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
7767517
|
||||||
|
267 379
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation** of the above three lines:
|
||||||
|
|
||||||
|
1. ``7767517``, it is a magic number and should not be changed.
|
||||||
|
2. ``267 379``, the first number ``267`` specifies the number of layers
|
||||||
|
in this file, while ``379`` specifies the number of intermediate outputs
|
||||||
|
of this file
|
||||||
|
3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
|
||||||
|
is the layer name of this layer; ``0`` means this layer has no input;
|
||||||
|
``1`` means this layer has one output; ``in0`` is the output name of
|
||||||
|
this layer.
|
||||||
|
|
||||||
|
We need to add 1 extra line and also increment the number of layers.
|
||||||
|
The result looks like below:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
268 379
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=3 1=12 2=512 3=1024
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation**
|
||||||
|
|
||||||
|
1. ``7767517``, it is still the same
|
||||||
|
2. ``268 379``, we have added an extra layer, so we need to update ``267`` to ``268``.
|
||||||
|
We don't need to change ``379`` since the newly added layer has no inputs or outputs.
|
||||||
|
3. ``SherpaMetaData sherpa_meta_data1 0 0 0=3 1=12 2=512 3=1024``
|
||||||
|
This line is newly added. Its explanation is given below:
|
||||||
|
|
||||||
|
- ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
|
||||||
|
- ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
|
||||||
|
- ``0 0`` means this layer has no inputs or output. Must be ``0 0``
|
||||||
|
- ``0=3``, 0 is the key and 3 is the value. MUST be ``0=3``
|
||||||
|
- ``1=12``, 1 is the key and 12 is the value of the
|
||||||
|
parameter ``--num-encoder-layers`` that you provided when running
|
||||||
|
``./lstm_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``2=512``, 2 is the key and 512 is the value of the
|
||||||
|
parameter ``--encoder-dim`` that you provided when running
|
||||||
|
``./lstm_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``3=1024``, 3 is the key and 1024 is the value of the
|
||||||
|
parameter ``--rnn-hidden-size`` that you provided when running
|
||||||
|
``./lstm_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
|
||||||
|
For ease of reference, we list the key-value pairs that you need to add
|
||||||
|
in the following table. If your model has a different setting, please
|
||||||
|
change the values for ``SherpaMetaData`` accordingly. Otherwise, you
|
||||||
|
will be ``SAD``.
|
||||||
|
|
||||||
|
+------+-----------------------------+
|
||||||
|
| key | value |
|
||||||
|
+======+=============================+
|
||||||
|
| 0 | 3 (fixed) |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 1 | ``--num-encoder-layers`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 2 | ``--encoder-dim`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 3 | ``--rnn-hidden-size`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
|
||||||
|
4. ``Input in0 0 1 in0``. No need to change it.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
When you add a new layer ``SherpaMetaData``, please remember to update the
|
||||||
|
number of layers. In our case, update ``267`` to ``268``. Otherwise,
|
||||||
|
you will be SAD later.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
After adding the new layer ``SherpaMetaData``, you cannot use this model
|
||||||
|
with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
|
||||||
|
supported only in `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ is very flexible. You can add new layers to it just by text-editing
|
||||||
|
the ``param`` file! You don't need to change the ``bin`` file.
|
||||||
|
|
||||||
|
Now you can use this model in `sherpa-ncnn`_.
|
||||||
|
Please refer to the following documentation:
|
||||||
|
|
||||||
|
- Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
|
||||||
|
- ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
|
||||||
|
- ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
|
||||||
|
- Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
|
||||||
|
|
||||||
|
We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
|
||||||
|
|
||||||
|
- `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
|
||||||
|
|
||||||
|
You can find more usages there.
|
||||||
|
|
||||||
|
7. (Optional) int8 quantization with sherpa-ncnn
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
This step is optional.
|
||||||
|
|
||||||
|
In this step, we describe how to quantize our model with ``int8``.
|
||||||
|
|
||||||
|
Change :ref:`lstm-transducer-step-4-export-torchscript-model-via-pnnx` to
|
||||||
|
disable ``fp16`` when using ``pnnx``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt fp16=0
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt fp16=0
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
|
||||||
|
support quantizing the decoder model yet. We will update this documentation
|
||||||
|
once `ncnn`_ supports it. (Maybe in this year, 2023).
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*_jit_trace-pnnx.ncnn.{param,bin}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 503K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 317M Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 21K Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 3.0M Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
|
||||||
|
Let us compare again the file sizes:
|
||||||
|
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 318 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 159 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 317 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes are doubled when we disable ``fp16``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You can again use ``streaming-ncnn-decode.py`` to test the exported models.
|
||||||
|
|
||||||
|
Next, follow :ref:`lstm-modify-the-exported-encoder-for-sherpa-ncnn`
|
||||||
|
to modify ``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Change
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
267 379
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
to
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
268 379
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=3 1=12 2=512 3=1024
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please follow :ref:`lstm-modify-the-exported-encoder-for-sherpa-ncnn`
|
||||||
|
to change the values for ``SherpaMetaData`` if your model uses a different setting.
|
||||||
|
|
||||||
|
Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
|
||||||
|
`sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# We will download sherpa-ncnn to $HOME/open-source/
|
||||||
|
# You can change it to anywhere you like.
|
||||||
|
cd $HOME
|
||||||
|
mkdir -p open-source
|
||||||
|
|
||||||
|
cd open-source
|
||||||
|
git clone https://github.com/k2-fsa/sherpa-ncnn
|
||||||
|
cd sherpa-ncnn
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make -j 4
|
||||||
|
|
||||||
|
./bin/generate-int8-scale-table
|
||||||
|
|
||||||
|
export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
|
||||||
|
|
||||||
|
The output of the above commands are:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
(py38) kuangfangjun:build$ generate-int8-scale-table
|
||||||
|
Please provide 10 arg. Currently given: 1
|
||||||
|
Usage:
|
||||||
|
generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
|
||||||
|
|
||||||
|
Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
|
||||||
|
|
||||||
|
We need to create a file ``wave_filenames.txt``, in which we need to put
|
||||||
|
some calibration wave files. For testing purpose, we put the ``test_wavs``
|
||||||
|
from the pre-trained model repository
|
||||||
|
`<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
cat <<EOF > wave_filenames.txt
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
../test_wavs/1221-135766-0001.wav
|
||||||
|
../test_wavs/1221-135766-0002.wav
|
||||||
|
EOF
|
||||||
|
|
||||||
|
Now we can calculate the scales needed for quantization with the calibration data:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
generate-int8-scale-table \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./encoder-scale-table.txt \
|
||||||
|
./joiner-scale-table.txt \
|
||||||
|
./wave_filenames.txt
|
||||||
|
|
||||||
|
The output logs are in the following:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/generate-int-8-scale-table-for-lstm.txt
|
||||||
|
|
||||||
|
It generates the following two files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh encoder-scale-table.txt joiner-scale-table.txt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 345K Feb 17 12:13 encoder-scale-table.txt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 17K Feb 17 12:13 joiner-scale-table.txt
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Definitely, you need more calibration data to compute the scale table.
|
||||||
|
|
||||||
|
Finally, let us use the scale table to quantize our models into ``int8``.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ncnn2int8
|
||||||
|
|
||||||
|
usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
|
||||||
|
|
||||||
|
First, we quantize the encoder model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
ncnn2int8 \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./encoder-scale-table.txt
|
||||||
|
|
||||||
|
Next, we quantize the joiner model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ncnn2int8 \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./joiner-scale-table.txt
|
||||||
|
|
||||||
|
The above two commands generate the following 4 files:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 218M Feb 17 12:19 encoder_jit_trace-pnnx.ncnn.int8.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 21K Feb 17 12:19 encoder_jit_trace-pnnx.ncnn.int8.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 774K Feb 17 12:19 joiner_jit_trace-pnnx.ncnn.int8.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 496 Feb 17 12:19 joiner_jit_trace-pnnx.ncnn.int8.param
|
||||||
|
|
||||||
|
Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
|
||||||
|
|
||||||
|
You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
|
||||||
|
and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
|
||||||
|
|
||||||
|
For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
|
||||||
|
replace the following invocation:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
sherpa-ncnn \
|
||||||
|
../data/lang_bpe_500/tokens.txt \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
with
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
sherpa-ncnn \
|
||||||
|
../data/lang_bpe_500/tokens.txt \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
The following table compares again the file sizes:
|
||||||
|
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 318 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 159 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 317 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.int8.bin | 218 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.int8.bin | 774 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file size of the joiner model after ``int8`` quantization
|
||||||
|
is much smaller. However, the size of the encoder model is even larger than
|
||||||
|
the ``fp16`` counterpart. The reason is that `ncnn`_ currently does not support
|
||||||
|
quantizing ``LSTM`` layers into ``8-bit``. Please see
|
||||||
|
`<https://github.com/Tencent/ncnn/issues/4532>`_
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Currently, only linear layers and convolutional layers are quantized
|
||||||
|
with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You need to test the recognition accuracy after ``int8`` quantization.
|
||||||
|
|
||||||
|
|
||||||
|
That's it! Have fun with `sherpa-ncnn`_!
|
@ -1,15 +1,26 @@
|
|||||||
Export to ncnn
|
Export to ncnn
|
||||||
==============
|
==============
|
||||||
|
|
||||||
We support exporting both
|
We support exporting the following models
|
||||||
`LSTM transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless2>`_
|
to `ncnn <https://github.com/tencent/ncnn>`_:
|
||||||
and
|
|
||||||
`ConvEmformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conv_emformer_transducer_stateless2>`_
|
|
||||||
to `ncnn <https://github.com/tencent/ncnn>`_.
|
|
||||||
|
|
||||||
We also provide `<https://github.com/k2-fsa/sherpa-ncnn>`_
|
- `Zipformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless7_streaming>`_
|
||||||
performing speech recognition using ``ncnn`` with exported models.
|
|
||||||
It has been tested on Linux, macOS, Windows, ``Android``, and ``Raspberry Pi``.
|
- `LSTM transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless2>`_
|
||||||
|
|
||||||
|
- `ConvEmformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conv_emformer_transducer_stateless2>`_
|
||||||
|
|
||||||
|
We also provide `sherpa-ncnn`_
|
||||||
|
for performing speech recognition using `ncnn`_ with exported models.
|
||||||
|
It has been tested on the following platforms:
|
||||||
|
|
||||||
|
- Linux
|
||||||
|
- macOS
|
||||||
|
- Windows
|
||||||
|
- ``Android``
|
||||||
|
- ``iOS``
|
||||||
|
- ``Raspberry Pi``
|
||||||
|
- `爱芯派 <https://wiki.sipeed.com/hardware/zh/>`_ (`MAIX-III AXera-Pi <https://wiki.sipeed.com/hardware/en/maixIII/ax-pi/axpi.html>`_).
|
||||||
|
|
||||||
`sherpa-ncnn`_ is self-contained and can be statically linked to produce
|
`sherpa-ncnn`_ is self-contained and can be statically linked to produce
|
||||||
a binary containing everything needed. Please refer
|
a binary containing everything needed. Please refer
|
||||||
@ -18,754 +29,7 @@ to its documentation for details:
|
|||||||
- `<https://k2-fsa.github.io/sherpa/ncnn/index.html>`_
|
- `<https://k2-fsa.github.io/sherpa/ncnn/index.html>`_
|
||||||
|
|
||||||
|
|
||||||
Export LSTM transducer models
|
.. toctree::
|
||||||
-----------------------------
|
|
||||||
|
|
||||||
Please refer to :ref:`export-lstm-transducer-model-for-ncnn` for details.
|
export-ncnn-conv-emformer
|
||||||
|
export-ncnn-lstm
|
||||||
|
|
||||||
|
|
||||||
Export ConvEmformer transducer models
|
|
||||||
-------------------------------------
|
|
||||||
|
|
||||||
We use the pre-trained model from the following repository as an example:
|
|
||||||
|
|
||||||
- `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
|
|
||||||
|
|
||||||
We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
We use ``Ubuntu 18.04``, ``torch 1.10``, and ``Python 3.8`` for testing.
|
|
||||||
|
|
||||||
.. caution::
|
|
||||||
|
|
||||||
Please use a more recent version of PyTorch. For instance, ``torch 1.8``
|
|
||||||
may ``not`` work.
|
|
||||||
|
|
||||||
1. Download the pre-trained model
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
You can also refer to `<https://k2-fsa.github.io/sherpa/cpp/pretrained_models/online_transducer.html#icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_ to download the pre-trained model.
|
|
||||||
|
|
||||||
You have to install `git-lfs`_ before you continue.
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
|
|
||||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
|
||||||
|
|
||||||
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
|
|
||||||
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
We download ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
|
|
||||||
|
|
||||||
|
|
||||||
In the above code, we download the pre-trained model into the directory
|
|
||||||
``egs/librispeech/ASR/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05``.
|
|
||||||
|
|
||||||
2. Install ncnn and pnnx
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
# We put ncnn into $HOME/open-source/ncnn
|
|
||||||
# You can change it to anywhere you like
|
|
||||||
|
|
||||||
cd $HOME
|
|
||||||
mkdir -p open-source
|
|
||||||
cd open-source
|
|
||||||
|
|
||||||
git clone https://github.com/csukuangfj/ncnn
|
|
||||||
cd ncnn
|
|
||||||
git submodule update --recursive --init
|
|
||||||
|
|
||||||
# Note: We don't use "python setup.py install" or "pip install ." here
|
|
||||||
|
|
||||||
mkdir -p build-wheel
|
|
||||||
cd build-wheel
|
|
||||||
|
|
||||||
cmake \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DNCNN_PYTHON=ON \
|
|
||||||
-DNCNN_BUILD_BENCHMARK=OFF \
|
|
||||||
-DNCNN_BUILD_EXAMPLES=OFF \
|
|
||||||
-DNCNN_BUILD_TOOLS=ON \
|
|
||||||
..
|
|
||||||
|
|
||||||
make -j4
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
|
|
||||||
# Note: $PWD here is $HOME/open-source/ncnn
|
|
||||||
|
|
||||||
export PYTHONPATH=$PWD/python:$PYTHONPATH
|
|
||||||
export PATH=$PWD/tools/pnnx/build/src:$PATH
|
|
||||||
export PATH=$PWD/build-wheel/tools/quantize:$PATH
|
|
||||||
|
|
||||||
# Now build pnnx
|
|
||||||
cd tools/pnnx
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
make -j4
|
|
||||||
|
|
||||||
./src/pnnx
|
|
||||||
|
|
||||||
Congratulations! You have successfully installed the following components:
|
|
||||||
|
|
||||||
- ``pnxx``, which is an executable located in
|
|
||||||
``$HOME/open-source/ncnn/tools/pnnx/build/src``. We will use
|
|
||||||
it to convert models exported by ``torch.jit.trace()``.
|
|
||||||
- ``ncnn2int8``, which is an executable located in
|
|
||||||
``$HOME/open-source/ncnn/build-wheel/tools/quantize``. We will use
|
|
||||||
it to quantize our models to ``int8``.
|
|
||||||
- ``ncnn.cpython-38-x86_64-linux-gnu.so``, which is a Python module located
|
|
||||||
in ``$HOME/open-source/ncnn/python/ncnn``.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
I am using ``Python 3.8``, so it
|
|
||||||
is ``ncnn.cpython-38-x86_64-linux-gnu.so``. If you use a different
|
|
||||||
version, say, ``Python 3.9``, the name would be
|
|
||||||
``ncnn.cpython-39-x86_64-linux-gnu.so``.
|
|
||||||
|
|
||||||
Also, if you are not using Linux, the file name would also be different.
|
|
||||||
But that does not matter. As long as you can compile it, it should work.
|
|
||||||
|
|
||||||
We have set up ``PYTHONPATH`` so that you can use ``import ncnn`` in your
|
|
||||||
Python code. We have also set up ``PATH`` so that you can use
|
|
||||||
``pnnx`` and ``ncnn2int8`` later in your terminal.
|
|
||||||
|
|
||||||
.. caution::
|
|
||||||
|
|
||||||
Please don't use `<https://github.com/tencent/ncnn>`_.
|
|
||||||
We have made some modifications to the offical `ncnn`_.
|
|
||||||
|
|
||||||
We will synchronize `<https://github.com/csukuangfj/ncnn>`_ periodically
|
|
||||||
with the official one.
|
|
||||||
|
|
||||||
3. Export the model via torch.jit.trace()
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
First, let us rename our pre-trained model:
|
|
||||||
|
|
||||||
.. code-block::
|
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp
|
|
||||||
|
|
||||||
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-30.pt
|
|
||||||
|
|
||||||
cd ../..
|
|
||||||
|
|
||||||
Next, we use the following code to export our model:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
dir=./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/
|
|
||||||
|
|
||||||
./conv_emformer_transducer_stateless2/export-for-ncnn.py \
|
|
||||||
--exp-dir $dir/exp \
|
|
||||||
--bpe-model $dir/data/lang_bpe_500/bpe.model \
|
|
||||||
--epoch 30 \
|
|
||||||
--avg 1 \
|
|
||||||
--use-averaged-model 0 \
|
|
||||||
\
|
|
||||||
--num-encoder-layers 12 \
|
|
||||||
--chunk-length 32 \
|
|
||||||
--cnn-module-kernel 31 \
|
|
||||||
--left-context-length 32 \
|
|
||||||
--right-context-length 8 \
|
|
||||||
--memory-size 32 \
|
|
||||||
--encoder-dim 512
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
We have renamed our model to ``epoch-30.pt`` so that we can use ``--epoch 30``.
|
|
||||||
There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
|
|
||||||
|
|
||||||
If you have trained a model by yourself and if you have all checkpoints
|
|
||||||
available, please first use ``decode.py`` to tune ``--epoch --avg``
|
|
||||||
and select the best combination with with ``--use-averaged-model 1``.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
You will see the following log output:
|
|
||||||
|
|
||||||
.. literalinclude:: ./code/export-conv-emformer-transducer-for-ncnn-output.txt
|
|
||||||
|
|
||||||
The log shows the model has ``75490012`` parameters, i.e., ``~75 M``.
|
|
||||||
|
|
||||||
.. code-block::
|
|
||||||
|
|
||||||
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
|
|
||||||
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 289M Jan 11 12:05 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
|
|
||||||
|
|
||||||
You can see that the file size of the pre-trained model is ``289 MB``, which
|
|
||||||
is roughly ``75490012*4/1024/1024 = 287.97 MB``.
|
|
||||||
|
|
||||||
After running ``conv_emformer_transducer_stateless2/export-for-ncnn.py``,
|
|
||||||
we will get the following files:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*pnnx*
|
|
||||||
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 1010K Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.pt
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 283M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.pt
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 3.0M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.pt
|
|
||||||
|
|
||||||
|
|
||||||
.. _conv-emformer-step-3-export-torchscript-model-via-pnnx:
|
|
||||||
|
|
||||||
3. Export torchscript model via pnnx
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
Make sure you have set up the ``PATH`` environment variable. Otherwise,
|
|
||||||
it will throw an error saying that ``pnnx`` could not be found.
|
|
||||||
|
|
||||||
Now, it's time to export our models to `ncnn`_ via ``pnnx``.
|
|
||||||
|
|
||||||
.. code-block::
|
|
||||||
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
|
||||||
|
|
||||||
pnnx ./encoder_jit_trace-pnnx.pt
|
|
||||||
pnnx ./decoder_jit_trace-pnnx.pt
|
|
||||||
pnnx ./joiner_jit_trace-pnnx.pt
|
|
||||||
|
|
||||||
It will generate the following files:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*ncnn*{bin,param}
|
|
||||||
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 503K Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 437 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 142M Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 79K Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 1.5M Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 488 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
|
|
||||||
|
|
||||||
There are two types of files:
|
|
||||||
|
|
||||||
- ``param``: It is a text file containing the model architectures. You can
|
|
||||||
use a text editor to view its content.
|
|
||||||
- ``bin``: It is a binary file containing the model parameters.
|
|
||||||
|
|
||||||
We compare the file sizes of the models below before and after converting via ``pnnx``:
|
|
||||||
|
|
||||||
.. see https://tableconvert.com/restructuredtext-generator
|
|
||||||
|
|
||||||
+----------------------------------+------------+
|
|
||||||
| File name | File size |
|
|
||||||
+==================================+============+
|
|
||||||
| encoder_jit_trace-pnnx.pt | 283 MB |
|
|
||||||
+----------------------------------+------------+
|
|
||||||
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
|
||||||
+----------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
|
||||||
+----------------------------------+------------+
|
|
||||||
| encoder_jit_trace-pnnx.ncnn.bin | 142 MB |
|
|
||||||
+----------------------------------+------------+
|
|
||||||
| decoder_jit_trace-pnnx.ncnn.bin | 503 KB |
|
|
||||||
+----------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.ncnn.bin | 1.5 MB |
|
|
||||||
+----------------------------------+------------+
|
|
||||||
|
|
||||||
You can see that the file sizes of the models after conversion are about one half
|
|
||||||
of the models before conversion:
|
|
||||||
|
|
||||||
- encoder: 283 MB vs 142 MB
|
|
||||||
- decoder: 1010 KB vs 503 KB
|
|
||||||
- joiner: 3.0 MB vs 1.5 MB
|
|
||||||
|
|
||||||
The reason is that by default ``pnnx`` converts ``float32`` parameters
|
|
||||||
to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
|
|
||||||
for ``float16``. Thus, it is ``twice smaller`` after conversion.
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
|
|
||||||
won't convert ``float32`` to ``float16``.
|
|
||||||
|
|
||||||
4. Test the exported models in icefall
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
We assume you have set up the environment variable ``PYTHONPATH`` when
|
|
||||||
building `ncnn`_.
|
|
||||||
|
|
||||||
Now we have successfully converted our pre-trained model to `ncnn`_ format.
|
|
||||||
The generated 6 files are what we need. You can use the following code to
|
|
||||||
test the converted models:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
|
|
||||||
--tokens ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/data/lang_bpe_500/tokens.txt \
|
|
||||||
--encoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--encoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--decoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--decoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--joiner-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
--joiner-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
`ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
|
|
||||||
only 1 wave file as input.
|
|
||||||
|
|
||||||
The output is given below:
|
|
||||||
|
|
||||||
.. literalinclude:: ./code/test-stremaing-ncnn-decode-conv-emformer-transducer-libri.txt
|
|
||||||
|
|
||||||
Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
|
|
||||||
|
|
||||||
|
|
||||||
.. _conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn:
|
|
||||||
|
|
||||||
5. Modify the exported encoder for sherpa-ncnn
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
In order to use the exported models in `sherpa-ncnn`_, we have to modify
|
|
||||||
``encoder_jit_trace-pnnx.ncnn.param``.
|
|
||||||
|
|
||||||
Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
|
|
||||||
|
|
||||||
.. code-block::
|
|
||||||
|
|
||||||
7767517
|
|
||||||
1060 1342
|
|
||||||
Input in0 0 1 in0
|
|
||||||
|
|
||||||
**Explanation** of the above three lines:
|
|
||||||
|
|
||||||
1. ``7767517``, it is a magic number and should not be changed.
|
|
||||||
2. ``1060 1342``, the first number ``1060`` specifies the number of layers
|
|
||||||
in this file, while ``1342`` specifies the number of intermediate outputs
|
|
||||||
of this file
|
|
||||||
3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
|
|
||||||
is the layer name of this layer; ``0`` means this layer has no input;
|
|
||||||
``1`` means this layer has one output; ``in0`` is the output name of
|
|
||||||
this layer.
|
|
||||||
|
|
||||||
We need to add 1 extra line and also increment the number of layers.
|
|
||||||
The result looks like below:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
7767517
|
|
||||||
1061 1342
|
|
||||||
SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
|
|
||||||
Input in0 0 1 in0
|
|
||||||
|
|
||||||
**Explanation**
|
|
||||||
|
|
||||||
1. ``7767517``, it is still the same
|
|
||||||
2. ``1061 1342``, we have added an extra layer, so we need to update ``1060`` to ``1061``.
|
|
||||||
We don't need to change ``1342`` since the newly added layer has no inputs or outputs.
|
|
||||||
3. ``SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512``
|
|
||||||
This line is newly added. Its explanation is given below:
|
|
||||||
|
|
||||||
- ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
|
|
||||||
- ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
|
|
||||||
- ``0 0`` means this layer has no inputs or output. Must be ``0 0``
|
|
||||||
- ``0=1``, 0 is the key and 1 is the value. MUST be ``0=1``
|
|
||||||
- ``1=12``, 1 is the key and 12 is the value of the
|
|
||||||
parameter ``--num-encoder-layers`` that you provided when running
|
|
||||||
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
|
||||||
- ``2=32``, 2 is the key and 32 is the value of the
|
|
||||||
parameter ``--memory-size`` that you provided when running
|
|
||||||
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
|
||||||
- ``3=31``, 3 is the key and 31 is the value of the
|
|
||||||
parameter ``--cnn-module-kernel`` that you provided when running
|
|
||||||
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
|
||||||
- ``4=8``, 4 is the key and 8 is the value of the
|
|
||||||
parameter ``--left-context-length`` that you provided when running
|
|
||||||
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
|
||||||
- ``5=32``, 5 is the key and 32 is the value of the
|
|
||||||
parameter ``--chunk-length`` that you provided when running
|
|
||||||
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
|
||||||
- ``6=8``, 6 is the key and 8 is the value of the
|
|
||||||
parameter ``--right-context-length`` that you provided when running
|
|
||||||
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
|
||||||
- ``7=512``, 7 is the key and 512 is the value of the
|
|
||||||
parameter ``--encoder-dim`` that you provided when running
|
|
||||||
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
|
||||||
|
|
||||||
For ease of reference, we list the key-value pairs that you need to add
|
|
||||||
in the following table. If your model has a different setting, please
|
|
||||||
change the values for ``SherpaMetaData`` accordingly. Otherwise, you
|
|
||||||
will be ``SAD``.
|
|
||||||
|
|
||||||
+------+-----------------------------+
|
|
||||||
| key | value |
|
|
||||||
+======+=============================+
|
|
||||||
| 0 | 1 (fixed) |
|
|
||||||
+------+-----------------------------+
|
|
||||||
| 1 | ``--num-encoder-layers`` |
|
|
||||||
+------+-----------------------------+
|
|
||||||
| 2 | ``--memory-size`` |
|
|
||||||
+------+-----------------------------+
|
|
||||||
| 3 | ``--cnn-module-kernel`` |
|
|
||||||
+------+-----------------------------+
|
|
||||||
| 4 | ``--left-context-length`` |
|
|
||||||
+------+-----------------------------+
|
|
||||||
| 5 | ``--chunk-length`` |
|
|
||||||
+------+-----------------------------+
|
|
||||||
| 6 | ``--right-context-length`` |
|
|
||||||
+------+-----------------------------+
|
|
||||||
| 7 | ``--encoder-dim`` |
|
|
||||||
+------+-----------------------------+
|
|
||||||
|
|
||||||
4. ``Input in0 0 1 in0``. No need to change it.
|
|
||||||
|
|
||||||
.. caution::
|
|
||||||
|
|
||||||
When you add a new layer ``SherpaMetaData``, please remember to update the
|
|
||||||
number of layers. In our case, update ``1060`` to ``1061``. Otherwise,
|
|
||||||
you will be SAD later.
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
After adding the new layer ``SherpaMetaData``, you cannot use this model
|
|
||||||
with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
|
|
||||||
supported only in `sherpa-ncnn`_.
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
`ncnn`_ is very flexible. You can add new layers to it just by text-editing
|
|
||||||
the ``param`` file! You don't need to change the ``bin`` file.
|
|
||||||
|
|
||||||
Now you can use this model in `sherpa-ncnn`_.
|
|
||||||
Please refer to the following documentation:
|
|
||||||
|
|
||||||
- Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
|
|
||||||
- Android: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
|
|
||||||
- Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
|
|
||||||
|
|
||||||
We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
|
|
||||||
|
|
||||||
- `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
|
|
||||||
|
|
||||||
You can find more usages there.
|
|
||||||
|
|
||||||
6. (Optional) int8 quantization with sherpa-ncnn
|
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
||||||
|
|
||||||
This step is optional.
|
|
||||||
|
|
||||||
In this step, we describe how to quantize our model with ``int8``.
|
|
||||||
|
|
||||||
Change :ref:`conv-emformer-step-3-export-torchscript-model-via-pnnx` to
|
|
||||||
disable ``fp16`` when using ``pnnx``:
|
|
||||||
|
|
||||||
.. code-block::
|
|
||||||
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
|
||||||
|
|
||||||
pnnx ./encoder_jit_trace-pnnx.pt fp16=0
|
|
||||||
pnnx ./decoder_jit_trace-pnnx.pt
|
|
||||||
pnnx ./joiner_jit_trace-pnnx.pt fp16=0
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
|
|
||||||
support quantizing the decoder model yet. We will update this documentation
|
|
||||||
once `ncnn`_ supports it. (Maybe in this year, 2023).
|
|
||||||
|
|
||||||
It will generate the following files
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*_jit_trace-pnnx.ncnn.{param,bin}
|
|
||||||
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 503K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 437 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 283M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 79K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 3.0M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 488 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
|
|
||||||
|
|
||||||
Let us compare again the file sizes:
|
|
||||||
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| File name | File size |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| encoder_jit_trace-pnnx.pt | 283 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
|
|
||||||
You can see that the file sizes are doubled when we disable ``fp16``.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
You can again use ``streaming-ncnn-decode.py`` to test the exported models.
|
|
||||||
|
|
||||||
Next, follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
|
|
||||||
to modify ``encoder_jit_trace-pnnx.ncnn.param``.
|
|
||||||
|
|
||||||
Change
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
7767517
|
|
||||||
1060 1342
|
|
||||||
Input in0 0 1 in0
|
|
||||||
|
|
||||||
to
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
7767517
|
|
||||||
1061 1342
|
|
||||||
SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
|
|
||||||
Input in0 0 1 in0
|
|
||||||
|
|
||||||
.. caution::
|
|
||||||
|
|
||||||
Please follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
|
|
||||||
to change the values for ``SherpaMetaData`` if your model uses a different setting.
|
|
||||||
|
|
||||||
|
|
||||||
Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
|
|
||||||
`sherpa-ncnn`_.
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
# We will download sherpa-ncnn to $HOME/open-source/
|
|
||||||
# You can change it to anywhere you like.
|
|
||||||
cd $HOME
|
|
||||||
mkdir -p open-source
|
|
||||||
|
|
||||||
cd open-source
|
|
||||||
git clone https://github.com/k2-fsa/sherpa-ncnn
|
|
||||||
cd sherpa-ncnn
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
make -j 4
|
|
||||||
|
|
||||||
./bin/generate-int8-scale-table
|
|
||||||
|
|
||||||
export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
|
|
||||||
|
|
||||||
The output of the above commands are:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
(py38) kuangfangjun:build$ generate-int8-scale-table
|
|
||||||
Please provide 10 arg. Currently given: 1
|
|
||||||
Usage:
|
|
||||||
generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
|
|
||||||
|
|
||||||
Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
|
|
||||||
|
|
||||||
We need to create a file ``wave_filenames.txt``, in which we need to put
|
|
||||||
some calibration wave files. For testing purpose, we put the ``test_wavs``
|
|
||||||
from the pre-trained model repository `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
|
||||||
|
|
||||||
cat <<EOF > wave_filenames.txt
|
|
||||||
../test_wavs/1089-134686-0001.wav
|
|
||||||
../test_wavs/1221-135766-0001.wav
|
|
||||||
../test_wavs/1221-135766-0002.wav
|
|
||||||
EOF
|
|
||||||
|
|
||||||
Now we can calculate the scales needed for quantization with the calibration data:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
|
||||||
|
|
||||||
generate-int8-scale-table \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./decoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
./decoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./encoder-scale-table.txt \
|
|
||||||
./joiner-scale-table.txt \
|
|
||||||
./wave_filenames.txt
|
|
||||||
|
|
||||||
The output logs are in the following:
|
|
||||||
|
|
||||||
.. literalinclude:: ./code/generate-int-8-scale-table-for-conv-emformer.txt
|
|
||||||
|
|
||||||
It generates the following two files:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
$ ls -lh encoder-scale-table.txt joiner-scale-table.txt
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 955K Jan 11 17:28 encoder-scale-table.txt
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 18K Jan 11 17:28 joiner-scale-table.txt
|
|
||||||
|
|
||||||
.. caution::
|
|
||||||
|
|
||||||
Definitely, you need more calibration data to compute the scale table.
|
|
||||||
|
|
||||||
Finally, let us use the scale table to quantize our models into ``int8``.
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
ncnn2int8
|
|
||||||
|
|
||||||
usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
|
|
||||||
|
|
||||||
First, we quantize the encoder model:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
|
||||||
|
|
||||||
ncnn2int8 \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
|
||||||
./encoder-scale-table.txt
|
|
||||||
|
|
||||||
Next, we quantize the joiner model:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
ncnn2int8 \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.int8.param \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.int8.bin \
|
|
||||||
./joiner-scale-table.txt
|
|
||||||
|
|
||||||
The above two commands generate the following 4 files:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 99M Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.bin
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 78K Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.param
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 774K Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.bin
|
|
||||||
-rw-r--r-- 1 kuangfangjun root 496 Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.param
|
|
||||||
|
|
||||||
Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
|
|
||||||
|
|
||||||
.. caution::
|
|
||||||
|
|
||||||
``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
|
|
||||||
|
|
||||||
You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
|
|
||||||
and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
|
|
||||||
|
|
||||||
For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
|
|
||||||
replace the following invocation:
|
|
||||||
|
|
||||||
.. code-block::
|
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
|
||||||
|
|
||||||
sherpa-ncnn \
|
|
||||||
../data/lang_bpe_500/tokens.txt \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./decoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
./decoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
../test_wavs/1089-134686-0001.wav
|
|
||||||
|
|
||||||
with
|
|
||||||
|
|
||||||
.. code-block::
|
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
|
||||||
|
|
||||||
sherpa-ncnn \
|
|
||||||
../data/lang_bpe_500/tokens.txt \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
|
||||||
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
|
||||||
./decoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
./decoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
./joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
../test_wavs/1089-134686-0001.wav
|
|
||||||
|
|
||||||
|
|
||||||
The following table compares again the file sizes:
|
|
||||||
|
|
||||||
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| File name | File size |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| encoder_jit_trace-pnnx.pt | 283 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| encoder_jit_trace-pnnx.ncnn.int8.bin | 99 MB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
| joiner_jit_trace-pnnx.ncnn.int8.bin | 774 KB |
|
|
||||||
+----------------------------------------+------------+
|
|
||||||
|
|
||||||
You can see that the file sizes of the model after ``int8`` quantization
|
|
||||||
are much smaller.
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
Currently, only linear layers and convolutional layers are quantized
|
|
||||||
with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
You need to test the recognition accuracy after ``int8`` quantization.
|
|
||||||
|
|
||||||
You can find the speed comparison at `<https://github.com/k2-fsa/sherpa-ncnn/issues/44>`_.
|
|
||||||
|
|
||||||
|
|
||||||
That's it! Have fun with `sherpa-ncnn`_!
|
|
||||||
|
@ -10,7 +10,7 @@ There is also a file named ``onnx_pretrained.py``, which you can use
|
|||||||
the exported `ONNX`_ model in Python with `onnxruntime`_ to decode sound files.
|
the exported `ONNX`_ model in Python with `onnxruntime`_ to decode sound files.
|
||||||
|
|
||||||
Example
|
Example
|
||||||
=======
|
-------
|
||||||
|
|
||||||
In the following, we demonstrate how to export a streaming Zipformer pre-trained
|
In the following, we demonstrate how to export a streaming Zipformer pre-trained
|
||||||
model from
|
model from
|
||||||
|
@ -515,132 +515,6 @@ To use the generated files with ``./lstm_transducer_stateless2/jit_pretrained``:
|
|||||||
Please see `<https://k2-fsa.github.io/sherpa/python/streaming_asr/lstm/english/server.html>`_
|
Please see `<https://k2-fsa.github.io/sherpa/python/streaming_asr/lstm/english/server.html>`_
|
||||||
for how to use the exported models in ``sherpa``.
|
for how to use the exported models in ``sherpa``.
|
||||||
|
|
||||||
.. _export-lstm-transducer-model-for-ncnn:
|
|
||||||
|
|
||||||
Export LSTM transducer models for ncnn
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
We support exporting pretrained LSTM transducer models to
|
|
||||||
`ncnn <https://github.com/tencent/ncnn>`_ using
|
|
||||||
`pnnx <https://github.com/Tencent/ncnn/tree/master/tools/pnnx>`_.
|
|
||||||
|
|
||||||
First, let us install a modified version of ``ncnn``:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
git clone https://github.com/csukuangfj/ncnn
|
|
||||||
cd ncnn
|
|
||||||
git submodule update --recursive --init
|
|
||||||
|
|
||||||
# Note: We don't use "python setup.py install" or "pip install ." here
|
|
||||||
|
|
||||||
mkdir -p build-wheel
|
|
||||||
cd build-wheel
|
|
||||||
|
|
||||||
cmake \
|
|
||||||
-DCMAKE_BUILD_TYPE=Release \
|
|
||||||
-DNCNN_PYTHON=ON \
|
|
||||||
-DNCNN_BUILD_BENCHMARK=OFF \
|
|
||||||
-DNCNN_BUILD_EXAMPLES=OFF \
|
|
||||||
-DNCNN_BUILD_TOOLS=ON \
|
|
||||||
..
|
|
||||||
|
|
||||||
make -j4
|
|
||||||
|
|
||||||
cd ..
|
|
||||||
|
|
||||||
# Note: $PWD here is /path/to/ncnn
|
|
||||||
|
|
||||||
export PYTHONPATH=$PWD/python:$PYTHONPATH
|
|
||||||
export PATH=$PWD/tools/pnnx/build/src:$PATH
|
|
||||||
export PATH=$PWD/build-wheel/tools/quantize:$PATH
|
|
||||||
|
|
||||||
# now build pnnx
|
|
||||||
cd tools/pnnx
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
make -j4
|
|
||||||
|
|
||||||
./src/pnnx
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
We assume that you have added the path to the binary ``pnnx`` to the
|
|
||||||
environment variable ``PATH``.
|
|
||||||
|
|
||||||
We also assume that you have added ``build/tools/quantize`` to the environment
|
|
||||||
variable ``PATH`` so that you are able to use ``ncnn2int8`` later.
|
|
||||||
|
|
||||||
Second, let us export the model using ``torch.jit.trace()`` that is suitable
|
|
||||||
for ``pnnx``:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
iter=468000
|
|
||||||
avg=16
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/export-for-ncnn.py \
|
|
||||||
--exp-dir ./lstm_transducer_stateless2/exp \
|
|
||||||
--bpe-model data/lang_bpe_500/bpe.model \
|
|
||||||
--iter $iter \
|
|
||||||
--avg $avg
|
|
||||||
|
|
||||||
It will generate 3 files:
|
|
||||||
|
|
||||||
- ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.pt``
|
|
||||||
- ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.pt``
|
|
||||||
- ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.pt``
|
|
||||||
|
|
||||||
Third, convert torchscript model to ``ncnn`` format:
|
|
||||||
|
|
||||||
.. code-block::
|
|
||||||
|
|
||||||
pnnx ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.pt
|
|
||||||
pnnx ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.pt
|
|
||||||
pnnx ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.pt
|
|
||||||
|
|
||||||
It will generate the following files:
|
|
||||||
|
|
||||||
- ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param``
|
|
||||||
- ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin``
|
|
||||||
- ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param``
|
|
||||||
- ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin``
|
|
||||||
- ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param``
|
|
||||||
- ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin``
|
|
||||||
|
|
||||||
To use the above generated files, run:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/ncnn-decode.py \
|
|
||||||
--tokens ./data/lang_bpe_500/tokens.txt \
|
|
||||||
--encoder-param-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--encoder-bin-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--decoder-param-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--decoder-bin-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--joiner-param-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
--joiner-bin-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
/path/to/foo.wav
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/streaming-ncnn-decode.py \
|
|
||||||
--tokens ./data/lang_bpe_500/tokens.txt \
|
|
||||||
--encoder-param-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--encoder-bin-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--decoder-param-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--decoder-bin-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--joiner-param-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
--joiner-bin-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
/path/to/foo.wav
|
|
||||||
|
|
||||||
To use the above generated files in C++, please see
|
|
||||||
`<https://github.com/k2-fsa/sherpa-ncnn>`_
|
|
||||||
|
|
||||||
It is able to generate a static linked executable that can be run on Linux, Windows,
|
|
||||||
macOS, Raspberry Pi, etc, without external dependencies.
|
|
||||||
|
|
||||||
Download pretrained models
|
Download pretrained models
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user