From 9289dab4d63d5873c1696cf37173876f5f2b8777 Mon Sep 17 00:00:00 2001 From: csukuangfj Date: Mon, 2 Jan 2023 00:09:27 +0000 Subject: [PATCH] deploy: 2fd970b6821d47dacb2e6513321520db21fff67b --- contributing/code-style.html | 18 +- contributing/doc.html | 12 +- contributing/how-to-create-a-recipe.html | 12 +- installation/index.html | 60 +-- model-export/export-model-state-dict.html | 60 +-- model-export/export-onnx.html | 34 +- .../export-with-torch-jit-script.html | 14 +- model-export/export-with-torch-jit-trace.html | 32 +- .../aishell/conformer_ctc.html | 264 ++++----- .../aishell/stateless_transducer.html | 202 +++---- .../aishell/tdnn_lstm_ctc.html | 158 +++--- .../librispeech/conformer_ctc.html | 504 +++++++++--------- .../pruned_transducer_stateless.html | 144 ++--- .../librispeech/tdnn_lstm_ctc.html | 170 +++--- .../librispeech/zipformer_ctc_blankskip.html | 168 +++--- .../librispeech/zipformer_mmi.html | 138 ++--- .../timit/tdnn_ligru_ctc.html | 172 +++--- .../timit/tdnn_lstm_ctc.html | 172 +++--- recipes/Non-streaming-ASR/yesno/tdnn.html | 190 +++---- .../lstm_pruned_stateless_transducer.html | 266 ++++----- .../pruned_transducer_stateless.html | 236 ++++---- .../librispeech/zipformer_transducer.html | 238 ++++----- 22 files changed, 1632 insertions(+), 1632 deletions(-) diff --git a/contributing/code-style.html b/contributing/code-style.html index b237d16cd..db1a46ca6 100644 --- a/contributing/code-style.html +++ b/contributing/code-style.html @@ -140,22 +140,22 @@ it should succeed this time:

If you want to check the style of your code before git commit, you can do the following:

-
$ pre-commit install
-$ pre-commit run
+
$ pre-commit install
+$ pre-commit run
 

Or without installing the pre-commit hooks:

-
$ cd icefall
-$ pip install black==22.3.0 flake8==5.0.4 isort==5.10.1
-$ black --check your_changed_file.py
-$ black your_changed_file.py  # modify it in-place
+
$ cd icefall
+$ pip install black==22.3.0 flake8==5.0.4 isort==5.10.1
+$ black --check your_changed_file.py
+$ black your_changed_file.py  # modify it in-place
 $
-$ flake8 your_changed_file.py
+$ flake8 your_changed_file.py
 $
-$ isort --check your_changed_file.py  # modify it in-place
-$ isort your_changed_file.py
+$ isort --check your_changed_file.py  # modify it in-place
+$ isort your_changed_file.py
 
diff --git a/contributing/doc.html b/contributing/doc.html index 952852d91..dcba822e3 100644 --- a/contributing/doc.html +++ b/contributing/doc.html @@ -88,8 +88,8 @@ for documentation.

Before writing documentation, you have to prepare the environment:

-
$ cd docs
-$ pip install -r requirements.txt
+
$ cd docs
+$ pip install -r requirements.txt
 
@@ -99,16 +99,16 @@ if you are not familiar with After writing some documentation, you can build the documentation locally to preview what it looks like if it is published:

-
$ cd docs
-$ make html
+
$ cd docs
+$ make html
 

The generated documentation is in docs/build/html and can be viewed with the following commands:

-
$ cd docs/build/html
-$ python3 -m http.server
+
$ cd docs/build/html
+$ python3 -m http.server
 
diff --git a/contributing/how-to-create-a-recipe.html b/contributing/how-to-create-a-recipe.html index cfb4ec2ce..08dccf3d8 100644 --- a/contributing/how-to-create-a-recipe.html +++ b/contributing/how-to-create-a-recipe.html @@ -140,12 +140,12 @@ $ touch README.md model.py train.py decode.py asr_datamodule.py pretrained.py

For instance , the yesno recipe has a tdnn model and its directory structure looks like the following:

egs/yesno/ASR/tdnn/
-|-- README.md
-|-- asr_datamodule.py
-|-- decode.py
-|-- model.py
-|-- pretrained.py
-`-- train.py
+|-- README.md
+|-- asr_datamodule.py
+|-- decode.py
+|-- model.py
+|-- pretrained.py
+`-- train.py
 

File description:

diff --git a/installation/index.html b/installation/index.html index edf748e0c..fab7b095c 100644 --- a/installation/index.html +++ b/installation/index.html @@ -166,11 +166,11 @@ to install lhotsePYTHONPATH to point to it.

Assume you want to place icefall in the folder /tmp. The following commands show you how to setup icefall:

-
cd /tmp
-git clone https://github.com/k2-fsa/icefall
-cd icefall
-pip install -r requirements.txt
-export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+
cd /tmp
+git clone https://github.com/k2-fsa/icefall
+cd icefall
+pip install -r requirements.txt
+export PYTHONPATH=/tmp/icefall:$PYTHONPATH
 
@@ -185,39 +185,39 @@ to point to the version you want.

The following shows an example about setting up the environment.

(1) Create a virtual environment

-
$ virtualenv -p python3.8  test-icefall
+
$ virtualenv -p python3.8  test-icefall
 
-created virtual environment CPython3.8.6.final.0-64 in 1540ms
-  creator CPython3Posix(dest=/ceph-fj/fangjun/test-icefall, clear=False, no_vcs_ignore=False, global=False)
-  seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=copy, app_data_dir=/root/fangjun/.local/share/v
+created virtual environment CPython3.8.6.final.0-64 in 1540ms
+  creator CPython3Posix(dest=/ceph-fj/fangjun/test-icefall, clear=False, no_vcs_ignore=False, global=False)
+  seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=copy, app_data_dir=/root/fangjun/.local/share/v
 irtualenv)
-    added seed packages: pip==21.1.3, setuptools==57.4.0, wheel==0.36.2
-  activators BashActivator,CShellActivator,FishActivator,PowerShellActivator,PythonActivator,XonshActivator
+    added seed packages: pip==21.1.3, setuptools==57.4.0, wheel==0.36.2
+  activators BashActivator,CShellActivator,FishActivator,PowerShellActivator,PythonActivator,XonshActivator
 

(2) Activate your virtual environment

-
$ source test-icefall/bin/activate
+
$ source test-icefall/bin/activate
 

(3) Install k2

-
$ pip install k2==1.4.dev20210822+cpu.torch1.9.0 -f https://k2-fsa.org/nightly/index.html
+
$ pip install k2==1.4.dev20210822+cpu.torch1.9.0 -f https://k2-fsa.org/nightly/index.html
 
-Looking in links: https://k2-fsa.org/nightly/index.html
-Collecting k2==1.4.dev20210822+cpu.torch1.9.0
-  Downloading https://k2-fsa.org/nightly/whl/k2-1.4.dev20210822%2Bcpu.torch1.9.0-cp38-cp38-linux_x86_64.whl (1.6 MB)
-     |________________________________| 1.6 MB 185 kB/s
-Collecting graphviz
-  Downloading graphviz-0.17-py3-none-any.whl (18 kB)
-Collecting torch==1.9.0
-  Using cached torch-1.9.0-cp38-cp38-manylinux1_x86_64.whl (831.4 MB)
-Collecting typing-extensions
-  Using cached typing_extensions-3.10.0.0-py3-none-any.whl (26 kB)
-Installing collected packages: typing-extensions, torch, graphviz, k2
-Successfully installed graphviz-0.17 k2-1.4.dev20210822+cpu.torch1.9.0 torch-1.9.0 typing-extensions-3.10.0.0
+Looking in links: https://k2-fsa.org/nightly/index.html
+Collecting k2==1.4.dev20210822+cpu.torch1.9.0
+  Downloading https://k2-fsa.org/nightly/whl/k2-1.4.dev20210822%2Bcpu.torch1.9.0-cp38-cp38-linux_x86_64.whl (1.6 MB)
+     |________________________________| 1.6 MB 185 kB/s
+Collecting graphviz
+  Downloading graphviz-0.17-py3-none-any.whl (18 kB)
+Collecting torch==1.9.0
+  Using cached torch-1.9.0-cp38-cp38-manylinux1_x86_64.whl (831.4 MB)
+Collecting typing-extensions
+  Using cached typing_extensions-3.10.0.0-py3-none-any.whl (26 kB)
+Installing collected packages: typing-extensions, torch, graphviz, k2
+Successfully installed graphviz-0.17 k2-1.4.dev20210822+cpu.torch1.9.0 torch-1.9.0 typing-extensions-3.10.0.0
 
@@ -393,10 +393,10 @@ the

Data preparation

-
$ export PYTHONPATH=/tmp/icefall:$PYTHONPATH
-$ cd /tmp/icefall
-$ cd egs/yesno/ASR
-$ ./prepare.sh
+
$ export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+$ cd /tmp/icefall
+$ cd egs/yesno/ASR
+$ ./prepare.sh
 

The log of running ./prepare.sh is:

@@ -457,7 +457,7 @@ even if there are GPUs available.

Hint

In case you get a Segmentation fault (core dump) error, please use:

-
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
diff --git a/model-export/export-model-state-dict.html b/model-export/export-model-state-dict.html index 7b7e0afc3..84dafe496 100644 --- a/model-export/export-model-state-dict.html +++ b/model-export/export-model-state-dict.html @@ -123,13 +123,13 @@ as an example.

Note

The steps for other recipes are almost the same.

-
cd egs/librispeech/ASR
+
cd egs/librispeech/ASR
 
-./pruned_transducer_stateless3/export.py \
-  --exp-dir ./pruned_transducer_stateless3/exp \
-  --bpe-model data/lang_bpe_500/bpe.model \
-  --epoch 20 \
-  --avg 10
+./pruned_transducer_stateless3/export.py \
+  --exp-dir ./pruned_transducer_stateless3/exp \
+  --bpe-model data/lang_bpe_500/bpe.model \
+  --epoch 20 \
+  --avg 10
 

will generate a file pruned_transducer_stateless3/exp/pretrained.pt, which @@ -141,10 +141,10 @@ is a dict containing RESULTS.md of each dataset.

In the following, we demonstrate how to use the pretrained model from https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13.

-
cd egs/librispeech/ASR
+
cd egs/librispeech/ASR
 
-git lfs install
-git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
+git lfs install
+git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
 

After cloning the repo with git lfs, you will find several files in the folder @@ -153,15 +153,15 @@ that have a prefix exported by the above export.py.

In each recipe, there is also a file pretrained.py, which can use pretrained-xxx.pt to decode waves. The following is an example:

-
cd egs/librispeech/ASR
+
cd egs/librispeech/ASR
 
-./pruned_transducer_stateless3/pretrained.py \
-   --checkpoint ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/pretrained-iter-1224000-avg-14.pt \
-   --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500/bpe.model \
-   --method greedy_search \
-   ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1089-134686-0001.wav \
-   ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1221-135766-0001.wav \
-   ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1221-135766-0002.wav
+./pruned_transducer_stateless3/pretrained.py \
+   --checkpoint ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/pretrained-iter-1224000-avg-14.pt \
+   --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500/bpe.model \
+   --method greedy_search \
+   ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1089-134686-0001.wav \
+   ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1221-135766-0001.wav \
+   ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1221-135766-0002.wav
 

The above commands show how to use the exported model with pretrained.py to @@ -195,25 +195,25 @@ decode multiple sound files. Its output is given as follows for reference:

When we publish the model, we always note down its WERs on some test dataset in RESULTS.md. This section describes how to use the pretrained model to reproduce the WER.

-
cd egs/librispeech/ASR
-git lfs install
-git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
+
cd egs/librispeech/ASR
+git lfs install
+git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
 
-cd icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp
-ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
-cd ../..
+cd icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp
+ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
+cd ../..
 

We create a symlink with name epoch-9999.pt to pretrained-iter-1224000-avg-14.pt, so that we can pass --epoch 9999 --avg 1 to decode.py in the following command:

-
./pruned_transducer_stateless3/decode.py \
-    --epoch 9999 \
-    --avg 1 \
-    --exp-dir ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp \
-    --lang-dir ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500 \
-    --max-duration 600 \
-    --decoding-method greedy_search
+
./pruned_transducer_stateless3/decode.py \
+    --epoch 9999 \
+    --avg 1 \
+    --exp-dir ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp \
+    --lang-dir ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500 \
+    --max-duration 600 \
+    --decoding-method greedy_search
 

You will find the decoding results in diff --git a/model-export/export-onnx.html b/model-export/export-onnx.html index a8362655f..9d3a8ef42 100644 --- a/model-export/export-onnx.html +++ b/model-export/export-onnx.html @@ -106,16 +106,16 @@ to run the pretrained model.

We use https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless3 as an example in the following.

-
cd egs/librispeech/ASR
+
cd egs/librispeech/ASR
 epoch=14
 avg=2
 
-./pruned_transducer_stateless3/export.py \
-  --exp-dir ./pruned_transducer_stateless3/exp \
-  --bpe-model data/lang_bpe_500/bpe.model \
-  --epoch $epoch \
-  --avg $avg \
-  --onnx 1
+./pruned_transducer_stateless3/export.py \
+  --exp-dir ./pruned_transducer_stateless3/exp \
+  --bpe-model data/lang_bpe_500/bpe.model \
+  --epoch $epoch \
+  --avg $avg \
+  --onnx 1
 

It will generate the following files inside pruned_transducer_stateless3/exp:

@@ -130,16 +130,16 @@ as an example in the following.

You can use ./pruned_transducer_stateless3/exp/onnx_pretrained.py to decode waves with the generated files:

-
./pruned_transducer_stateless3/onnx_pretrained.py \
-  --bpe-model ./data/lang_bpe_500/bpe.model \
-  --encoder-model-filename ./pruned_transducer_stateless3/exp/encoder.onnx \
-  --decoder-model-filename ./pruned_transducer_stateless3/exp/decoder.onnx \
-  --joiner-model-filename ./pruned_transducer_stateless3/exp/joiner.onnx \
-  --joiner-encoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_encoder_proj.onnx \
-  --joiner-decoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_decoder_proj.onnx \
-  /path/to/foo.wav \
-  /path/to/bar.wav \
-  /path/to/baz.wav
+
./pruned_transducer_stateless3/onnx_pretrained.py \
+  --bpe-model ./data/lang_bpe_500/bpe.model \
+  --encoder-model-filename ./pruned_transducer_stateless3/exp/encoder.onnx \
+  --decoder-model-filename ./pruned_transducer_stateless3/exp/decoder.onnx \
+  --joiner-model-filename ./pruned_transducer_stateless3/exp/joiner.onnx \
+  --joiner-encoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_encoder_proj.onnx \
+  --joiner-decoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_decoder_proj.onnx \
+  /path/to/foo.wav \
+  /path/to/bar.wav \
+  /path/to/baz.wav
 
diff --git a/model-export/export-with-torch-jit-script.html b/model-export/export-with-torch-jit-script.html index 36714562b..ae0ee3fec 100644 --- a/model-export/export-with-torch-jit-script.html +++ b/model-export/export-with-torch-jit-script.html @@ -108,16 +108,16 @@ if you want to use

We use https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless3 as an example in the following.

-
cd egs/librispeech/ASR
+
cd egs/librispeech/ASR
 epoch=14
 avg=1
 
-./pruned_transducer_stateless3/export.py \
-  --exp-dir ./pruned_transducer_stateless3/exp \
-  --bpe-model data/lang_bpe_500/bpe.model \
-  --epoch $epoch \
-  --avg $avg \
-  --jit 1
+./pruned_transducer_stateless3/export.py \
+  --exp-dir ./pruned_transducer_stateless3/exp \
+  --bpe-model data/lang_bpe_500/bpe.model \
+  --epoch $epoch \
+  --avg $avg \
+  --jit 1
 

It will generate a file cpu_jit.pt in pruned_transducer_stateless3/exp.

diff --git a/model-export/export-with-torch-jit-trace.html b/model-export/export-with-torch-jit-trace.html index 4216fe87b..9215ca393 100644 --- a/model-export/export-with-torch-jit-trace.html +++ b/model-export/export-with-torch-jit-trace.html @@ -111,14 +111,14 @@ as an example in the following.

iter=468000
 avg=16
 
-cd egs/librispeech/ASR
+cd egs/librispeech/ASR
 
-./lstm_transducer_stateless2/export.py \
-  --exp-dir ./lstm_transducer_stateless2/exp \
-  --bpe-model data/lang_bpe_500/bpe.model \
-  --iter $iter \
-  --avg  $avg \
-  --jit-trace 1
+./lstm_transducer_stateless2/export.py \
+  --exp-dir ./lstm_transducer_stateless2/exp \
+  --bpe-model data/lang_bpe_500/bpe.model \
+  --iter $iter \
+  --avg  $avg \
+  --jit-trace 1
 

It will generate three files inside lstm_transducer_stateless2/exp:

@@ -132,15 +132,15 @@ as an example in the following.

You can use https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/lstm_transducer_stateless2/jit_pretrained.py to decode sound files with the following commands:

-
cd egs/librispeech/ASR
-./lstm_transducer_stateless2/jit_pretrained.py \
-  --bpe-model ./data/lang_bpe_500/bpe.model \
-  --encoder-model-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace.pt \
-  --decoder-model-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace.pt \
-  --joiner-model-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace.pt \
-  /path/to/foo.wav \
-  /path/to/bar.wav \
-  /path/to/baz.wav
+
cd egs/librispeech/ASR
+./lstm_transducer_stateless2/jit_pretrained.py \
+  --bpe-model ./data/lang_bpe_500/bpe.model \
+  --encoder-model-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace.pt \
+  --decoder-model-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace.pt \
+  --joiner-model-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace.pt \
+  /path/to/foo.wav \
+  /path/to/bar.wav \
+  /path/to/baz.wav
 
diff --git a/recipes/Non-streaming-ASR/aishell/conformer_ctc.html b/recipes/Non-streaming-ASR/aishell/conformer_ctc.html index f736b40f7..1689ac219 100644 --- a/recipes/Non-streaming-ASR/aishell/conformer_ctc.html +++ b/recipes/Non-streaming-ASR/aishell/conformer_ctc.html @@ -130,8 +130,8 @@ the environment for

Data preparation

-
$ cd egs/aishell/ASR
-$ ./prepare.sh
+
$ cd egs/aishell/ASR
+$ ./prepare.sh
 

The script ./prepare.sh handles the data preparation for you, automagically. @@ -146,13 +146,13 @@ options:

to control which stage(s) should be run. By default, all stages are executed.

For example,

-
$ cd egs/aishell/ASR
-$ ./prepare.sh --stage 0 --stop-stage 0
+
$ cd egs/aishell/ASR
+$ ./prepare.sh --stage 0 --stop-stage 0
 

means to run only stage 0.

To run stage 2 to stage 5, use:

-
$ ./prepare.sh --stage 2 --stop-stage 5
+
$ ./prepare.sh --stage 2 --stop-stage 5
 
@@ -167,8 +167,8 @@ the dl_dirHint

A 3-gram language model will be downloaded from huggingface, we assume you have intalled and initialized git-lfs. If not, you could install git-lfs by

-
$ sudo apt-get install git-lfs
-$ git-lfs install
+
$ sudo apt-get install git-lfs
+$ git-lfs install
 

If you don’t have the sudo permission, you could download the @@ -184,8 +184,8 @@ are saved in ./data

Training

Configurable options

-
$ cd egs/aishell/ASR
-$ ./conformer_ctc/train.py --help
+
$ cd egs/aishell/ASR
+$ ./conformer_ctc/train.py --help
 

shows you the training options that can be passed from the commandline. @@ -227,26 +227,26 @@ training from epoch 10, based on the state from epoch 9.

Use case 1: You have 4 GPUs, but you only want to use GPU 0 and GPU 2 for training. You can do the following:

-
$ cd egs/aishell/ASR
-$ export CUDA_VISIBLE_DEVICES="0,2"
-$ ./conformer_ctc/train.py --world-size 2
+
$ cd egs/aishell/ASR
+$ export CUDA_VISIBLE_DEVICES="0,2"
+$ ./conformer_ctc/train.py --world-size 2
 

Use case 2: You have 4 GPUs and you want to use all of them for training. You can do the following:

-
$ cd egs/aishell/ASR
-$ ./conformer_ctc/train.py --world-size 4
+
$ cd egs/aishell/ASR
+$ ./conformer_ctc/train.py --world-size 4
 

Use case 3: You have 4 GPUs but you only want to use GPU 3 for training. You can do the following:

-
$ cd egs/aishell/ASR
-$ export CUDA_VISIBLE_DEVICES="3"
-$ ./conformer_ctc/train.py --world-size 1
+
$ cd egs/aishell/ASR
+$ export CUDA_VISIBLE_DEVICES="3"
+$ ./conformer_ctc/train.py --world-size 1
 
@@ -299,7 +299,7 @@ Each epoch actually processes These are checkpoint files, containing model state_dict and optimizer state_dict. To resume training from some checkpoint, say epoch-10.pt, you can use:

-
$ ./conformer_ctc/train.py --start-epoch 11
+
$ ./conformer_ctc/train.py --start-epoch 11
 
@@ -308,8 +308,8 @@ To resume training from some checkpoint, say
$ cd conformer_ctc/exp/tensorboard
-$ tensorboard dev upload --logdir . --name "Aishell conformer ctc training with icefall" --description "Training with new LabelSmoothing loss, see https://github.com/k2-fsa/icefall/pull/109"
+
$ cd conformer_ctc/exp/tensorboard
+$ tensorboard dev upload --logdir . --name "Aishell conformer ctc training with icefall" --description "Training with new LabelSmoothing loss, see https://github.com/k2-fsa/icefall/pull/109"
 
@@ -351,25 +351,25 @@ you saw printed to the console during training.

The following shows typical use cases:

Case 1

-
$ cd egs/aishell/ASR
-$ ./conformer_ctc/train.py --max-duration 200
+
$ cd egs/aishell/ASR
+$ ./conformer_ctc/train.py --max-duration 200
 

It uses --max-duration of 200 to avoid OOM.

Case 2

-
$ cd egs/aishell/ASR
-$ export CUDA_VISIBLE_DEVICES="0,3"
-$ ./conformer_ctc/train.py --world-size 2
+
$ cd egs/aishell/ASR
+$ export CUDA_VISIBLE_DEVICES="0,3"
+$ ./conformer_ctc/train.py --world-size 2
 

It uses GPU 0 and GPU 3 for DDP training.

Case 3

-
$ cd egs/aishell/ASR
-$ ./conformer_ctc/train.py --num-epochs 10 --start-epoch 3
+
$ cd egs/aishell/ASR
+$ ./conformer_ctc/train.py --num-epochs 10 --start-epoch 3
 

It loads checkpoint ./conformer_ctc/exp/epoch-2.pt and starts @@ -381,8 +381,8 @@ training from epoch 3. Also, it trains for 10 epochs.

Decoding

The decoding part uses checkpoints saved by the training part, so you have to run the training part first.

-
$ cd egs/aishell/ASR
-$ ./conformer_ctc/decode.py --help
+
$ cd egs/aishell/ASR
+$ ./conformer_ctc/decode.py --help
 

shows the options for decoding.

@@ -440,27 +440,27 @@ $ git clone https://huggingface.co/pkufool/icefall_asr_aishell_conformer_ctc

In order to use this pre-trained model, your k2 version has to be v1.7 or later.

After downloading, you will have the following files:

-
$ cd egs/aishell/ASR
-$ tree tmp
+
$ cd egs/aishell/ASR
+$ tree tmp
 
tmp/
-`-- icefall_asr_aishell_conformer_ctc
-    |-- README.md
-    |-- data
-    |   `-- lang_char
-    |       |-- HLG.pt
-    |       |-- tokens.txt
-    |       `-- words.txt
-    |-- exp
-    |   `-- pretrained.pt
-    `-- test_waves
-        |-- BAC009S0764W0121.wav
-        |-- BAC009S0764W0122.wav
-        |-- BAC009S0764W0123.wav
-        `-- trans.txt
+`-- icefall_asr_aishell_conformer_ctc
+    |-- README.md
+    |-- data
+    |   `-- lang_char
+    |       |-- HLG.pt
+    |       |-- tokens.txt
+    |       `-- words.txt
+    |-- exp
+    |   `-- pretrained.pt
+    `-- test_waves
+        |-- BAC009S0764W0121.wav
+        |-- BAC009S0764W0122.wav
+        |-- BAC009S0764W0123.wav
+        `-- trans.txt
 
-5 directories, 9 files
+5 directories, 9 files
 

File descriptions:

@@ -502,38 +502,38 @@ Note: We have removed optimizer

The information of the test sound files is listed below:

-
$ soxi tmp/icefall_asr_aishell_conformer_ctc/test_waves/*.wav
+
$ soxi tmp/icefall_asr_aishell_conformer_ctc/test_waves/*.wav
 
-Input File     : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'
-Channels       : 1
-Sample Rate    : 16000
-Precision      : 16-bit
-Duration       : 00:00:04.20 = 67263 samples ~ 315.295 CDDA sectors
-File Size      : 135k
-Bit Rate       : 256k
-Sample Encoding: 16-bit Signed Integer PCM
+Input File     : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'
+Channels       : 1
+Sample Rate    : 16000
+Precision      : 16-bit
+Duration       : 00:00:04.20 = 67263 samples ~ 315.295 CDDA sectors
+File Size      : 135k
+Bit Rate       : 256k
+Sample Encoding: 16-bit Signed Integer PCM
 
 
-Input File     : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'
-Channels       : 1
-Sample Rate    : 16000
-Precision      : 16-bit
-Duration       : 00:00:04.12 = 65840 samples ~ 308.625 CDDA sectors
-File Size      : 132k
-Bit Rate       : 256k
-Sample Encoding: 16-bit Signed Integer PCM
+Input File     : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'
+Channels       : 1
+Sample Rate    : 16000
+Precision      : 16-bit
+Duration       : 00:00:04.12 = 65840 samples ~ 308.625 CDDA sectors
+File Size      : 132k
+Bit Rate       : 256k
+Sample Encoding: 16-bit Signed Integer PCM
 
 
-Input File     : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'
-Channels       : 1
-Sample Rate    : 16000
-Precision      : 16-bit
-Duration       : 00:00:04.00 = 64000 samples ~ 300 CDDA sectors
-File Size      : 128k
-Bit Rate       : 256k
-Sample Encoding: 16-bit Signed Integer PCM
+Input File     : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'
+Channels       : 1
+Sample Rate    : 16000
+Precision      : 16-bit
+Duration       : 00:00:04.00 = 64000 samples ~ 300 CDDA sectors
+File Size      : 128k
+Bit Rate       : 256k
+Sample Encoding: 16-bit Signed Integer PCM
 
-Total Duration of 3 files: 00:00:12.32
+Total Duration of 3 files: 00:00:12.32
 
@@ -556,14 +556,14 @@ $ ./conformer_ctc/pretrained.py --help

CTC decoding

CTC decoding only uses the ctc topology for decoding without a lexicon and language model

The command to run CTC decoding is:

-
$ cd egs/aishell/ASR
-$ ./conformer_ctc/pretrained.py \
-  --checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
-  --tokens-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/tokens.txt \
-  --method ctc-decoding \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
+
$ cd egs/aishell/ASR
+$ ./conformer_ctc/pretrained.py \
+  --checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
+  --tokens-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/tokens.txt \
+  --method ctc-decoding \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
 

The output is given below:

@@ -593,15 +593,15 @@ $ ./conformer_ctc/pretrained.py \

HLG decoding

HLG decoding uses the best path of the decoding lattice as the decoding result.

The command to run HLG decoding is:

-
$ cd egs/aishell/ASR
-$ ./conformer_ctc/pretrained.py \
-  --checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
-  --words-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
-  --HLG ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
-  --method 1best \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
+
$ cd egs/aishell/ASR
+$ ./conformer_ctc/pretrained.py \
+  --checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
+  --words-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
+  --HLG ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
+  --method 1best \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
 

The output is given below:

@@ -633,15 +633,15 @@ $ ./conformer_ctc/pretrained.py \

It extracts n paths from the lattice, recores the extracted paths with an attention decoder. The path with the highest score is the decoding result.

The command to run HLG decoding + attention decoder rescoring is:

-
$ cd egs/aishell/ASR
-$ ./conformer_ctc/pretrained.py \
-  --checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
-  --words-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
-  --HLG ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
-  --method attention-decoder \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
-  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
+
$ cd egs/aishell/ASR
+$ ./conformer_ctc/pretrained.py \
+  --checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
+  --words-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
+  --HLG ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
+  --method attention-decoder \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
+  ./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
 

The output is below:

@@ -693,20 +693,20 @@ Python dependencies.

At present, it does NOT support streaming decoding.

First, let us compile k2 from source:

-
$ cd $HOME
-$ git clone https://github.com/k2-fsa/k2
-$ cd k2
-$ git checkout v2.0-pre
+
$ cd $HOME
+$ git clone https://github.com/k2-fsa/k2
+$ cd k2
+$ git checkout v2.0-pre
 

Caution

You have to switch to the branch v2.0-pre!

-
$ mkdir build-release
-$ cd build-release
-$ cmake -DCMAKE_BUILD_TYPE=Release ..
-$ make -j hlg_decode
+
$ mkdir build-release
+$ cd build-release
+$ cmake -DCMAKE_BUILD_TYPE=Release ..
+$ make -j hlg_decode
 
 # You will find four binaries in `./bin`, i.e. ./bin/hlg_decode,
 
@@ -714,8 +714,8 @@ $ make -j hlg_decode

Now you are ready to go!

Assume you have run:

-
$ cd k2/build-release
-$ ln -s /path/to/icefall-asr-aishell-conformer-ctc ./
+
$ cd k2/build-release
+$ ln -s /path/to/icefall-asr-aishell-conformer-ctc ./
 
@@ -724,40 +724,40 @@ $ ln -s /path/to/icefall-asr-aishell-conformer-ctc ./

It will show you the following message:

-
Please provide --nn_model
+
Please provide --nn_model
 
-This file implements decoding with an HLG decoding graph.
+This file implements decoding with an HLG decoding graph.
 
 Usage:
-  ./bin/hlg_decode \
-    --use_gpu true \
-    --nn_model <path to torch scripted pt file> \
-    --hlg <path to HLG.pt> \
-    --word_table <path to words.txt> \
-    <path to foo.wav> \
-    <path to bar.wav> \
-    <more waves if any>
+  ./bin/hlg_decode \
+    --use_gpu true \
+    --nn_model <path to torch scripted pt file> \
+    --hlg <path to HLG.pt> \
+    --word_table <path to words.txt> \
+    <path to foo.wav> \
+    <path to bar.wav> \
+    <more waves if any>
 
-To see all possible options, use
-  ./bin/hlg_decode --help
+To see all possible options, use
+  ./bin/hlg_decode --help
 
 Caution:
- - Only sound files (*.wav) with single channel are supported.
- - It assumes the model is conformer_ctc/transformer.py from icefall.
-   If you use a different model, you have to change the code
-   related to `model.forward` in this file.
+ - Only sound files (*.wav) with single channel are supported.
+ - It assumes the model is conformer_ctc/transformer.py from icefall.
+   If you use a different model, you have to change the code
+   related to `model.forward` in this file.
 

HLG decoding

-
./bin/hlg_decode \
-  --use_gpu true \
-  --nn_model icefall_asr_aishell_conformer_ctc/exp/cpu_jit.pt \
-  --hlg icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
-  --word_table icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
-  icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
-  icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
-  icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
+
./bin/hlg_decode \
+  --use_gpu true \
+  --nn_model icefall_asr_aishell_conformer_ctc/exp/cpu_jit.pt \
+  --hlg icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
+  --word_table icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
+  icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
+  icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
+  icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
 

The output is:

diff --git a/recipes/Non-streaming-ASR/aishell/stateless_transducer.html b/recipes/Non-streaming-ASR/aishell/stateless_transducer.html index f3c8456d9..8f6aaa22a 100644 --- a/recipes/Non-streaming-ASR/aishell/stateless_transducer.html +++ b/recipes/Non-streaming-ASR/aishell/stateless_transducer.html @@ -211,9 +211,9 @@ alternatives.

Data Preparation

To prepare the data for training, please use the following commands:

-
cd egs/aishell/ASR
-./prepare.sh --stop-stage 4
-./prepare.sh --stage 6 --stop-stage 6
+
cd egs/aishell/ASR
+./prepare.sh --stop-stage 4
+./prepare.sh --stage 6 --stop-stage 6
 
@@ -231,8 +231,8 @@ are not used in transducer training.

Training

-
cd egs/aishell/ASR
-./transducer_stateless_modified/train.py --help
+
cd egs/aishell/ASR
+./transducer_stateless_modified/train.py --help
 

shows you the training options that can be passed from the commandline. @@ -274,26 +274,26 @@ training from epoch 10, based on the state from epoch 9.

Use case 1: You have 4 GPUs, but you only want to use GPU 0 and GPU 2 for training. You can do the following:

-
$ cd egs/aishell/ASR
-$ export CUDA_VISIBLE_DEVICES="0,2"
-$ ./transducer_stateless_modified/train.py --world-size 2
+
$ cd egs/aishell/ASR
+$ export CUDA_VISIBLE_DEVICES="0,2"
+$ ./transducer_stateless_modified/train.py --world-size 2
 

Use case 2: You have 4 GPUs and you want to use all of them for training. You can do the following:

-
$ cd egs/aishell/ASR
-$ ./transducer_stateless_modified/train.py --world-size 4
+
$ cd egs/aishell/ASR
+$ ./transducer_stateless_modified/train.py --world-size 4
 

Use case 3: You have 4 GPUs but you only want to use GPU 3 for training. You can do the following:

-
$ cd egs/aishell/ASR
-$ export CUDA_VISIBLE_DEVICES="3"
-$ ./transducer_stateless_modified/train.py --world-size 1
+
$ cd egs/aishell/ASR
+$ export CUDA_VISIBLE_DEVICES="3"
+$ ./transducer_stateless_modified/train.py --world-size 1
 
@@ -358,7 +358,7 @@ Each epoch actually processes These are checkpoint files, containing model state_dict and optimizer state_dict. To resume training from some checkpoint, say epoch-10.pt, you can use:

-
$ ./transducer_stateless_modified/train.py --start-epoch 11
+
$ ./transducer_stateless_modified/train.py --start-epoch 11
 
@@ -367,8 +367,8 @@ To resume training from some checkpoint, say
$ cd transducer_stateless_modified/exp/tensorboard
-$ tensorboard dev upload --logdir . --name "Aishell transducer training with icefall" --description "Training modified transducer, see https://github.com/k2-fsa/icefall/pull/219"
+
$ cd transducer_stateless_modified/exp/tensorboard
+$ tensorboard dev upload --logdir . --name "Aishell transducer training with icefall" --description "Training modified transducer, see https://github.com/k2-fsa/icefall/pull/219"
 
@@ -410,25 +410,25 @@ you saw printed to the console during training.

The following shows typical use cases:

Case 1

-
$ cd egs/aishell/ASR
-$ ./transducer_stateless_modified/train.py --max-duration 250
+
$ cd egs/aishell/ASR
+$ ./transducer_stateless_modified/train.py --max-duration 250
 

It uses --max-duration of 250 to avoid OOM.

Case 2

-
$ cd egs/aishell/ASR
-$ export CUDA_VISIBLE_DEVICES="0,3"
-$ ./transducer_stateless_modified/train.py --world-size 2
+
$ cd egs/aishell/ASR
+$ export CUDA_VISIBLE_DEVICES="0,3"
+$ ./transducer_stateless_modified/train.py --world-size 2
 

It uses GPU 0 and GPU 3 for DDP training.

Case 3

-
$ cd egs/aishell/ASR
-$ ./transducer_stateless_modified/train.py --num-epochs 10 --start-epoch 3
+
$ cd egs/aishell/ASR
+$ ./transducer_stateless_modified/train.py --num-epochs 10 --start-epoch 3
 

It loads checkpoint ./transducer_stateless_modified/exp/epoch-2.pt and starts @@ -440,8 +440,8 @@ training from epoch 3. Also, it trains for 10 epochs.

Decoding

The decoding part uses checkpoints saved by the training part, so you have to run the training part first.

-
$ cd egs/aishell/ASR
-$ ./transducer_stateless_modified/decode.py --help
+
$ cd egs/aishell/ASR
+$ ./transducer_stateless_modified/decode.py --help
 

shows the options for decoding.

@@ -539,34 +539,34 @@ $ git clone https://huggingface.co/csukuangfj/icefall-aishell-transducer-statele

You have to use git lfs to download the pre-trained model.

After downloading, you will have the following files:

-
$ cd egs/aishell/ASR
-$ tree tmp/icefall-aishell-transducer-stateless-modified-2022-03-01
+
$ cd egs/aishell/ASR
+$ tree tmp/icefall-aishell-transducer-stateless-modified-2022-03-01
 
tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/
-|-- README.md
-|-- data
-|   `-- lang_char
-|       |-- L.pt
-|       |-- lexicon.txt
-|       |-- tokens.txt
-|       `-- words.txt
-|-- exp
-|   `-- pretrained.pt
-|-- log
-|   |-- errs-test-beam_4-epoch-64-avg-33-beam-4.txt
-|   |-- errs-test-greedy_search-epoch-64-avg-33-context-2-max-sym-per-frame-1.txt
-|   |-- log-decode-epoch-64-avg-33-beam-4-2022-03-02-12-05-03
-|   |-- log-decode-epoch-64-avg-33-context-2-max-sym-per-frame-1-2022-02-28-18-13-07
-|   |-- recogs-test-beam_4-epoch-64-avg-33-beam-4.txt
-|   `-- recogs-test-greedy_search-epoch-64-avg-33-context-2-max-sym-per-frame-1.txt
-`-- test_wavs
-    |-- BAC009S0764W0121.wav
-    |-- BAC009S0764W0122.wav
-    |-- BAC009S0764W0123.wav
-    `-- transcript.txt
+|-- README.md
+|-- data
+|   `-- lang_char
+|       |-- L.pt
+|       |-- lexicon.txt
+|       |-- tokens.txt
+|       `-- words.txt
+|-- exp
+|   `-- pretrained.pt
+|-- log
+|   |-- errs-test-beam_4-epoch-64-avg-33-beam-4.txt
+|   |-- errs-test-greedy_search-epoch-64-avg-33-context-2-max-sym-per-frame-1.txt
+|   |-- log-decode-epoch-64-avg-33-beam-4-2022-03-02-12-05-03
+|   |-- log-decode-epoch-64-avg-33-context-2-max-sym-per-frame-1-2022-02-28-18-13-07
+|   |-- recogs-test-beam_4-epoch-64-avg-33-beam-4.txt
+|   `-- recogs-test-greedy_search-epoch-64-avg-33-context-2-max-sym-per-frame-1.txt
+`-- test_wavs
+    |-- BAC009S0764W0121.wav
+    |-- BAC009S0764W0122.wav
+    |-- BAC009S0764W0123.wav
+    `-- transcript.txt
 
-5 directories, 16 files
+5 directories, 16 files
 

File descriptions:

@@ -595,38 +595,38 @@ Note: We have removed optimizer

The information of the test sound files is listed below:

-
$ soxi tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/*.wav
+
$ soxi tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/*.wav
 
-Input File     : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav'
-Channels       : 1
-Sample Rate    : 16000
-Precision      : 16-bit
-Duration       : 00:00:04.20 = 67263 samples ~ 315.295 CDDA sectors
-File Size      : 135k
-Bit Rate       : 256k
-Sample Encoding: 16-bit Signed Integer PCM
+Input File     : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav'
+Channels       : 1
+Sample Rate    : 16000
+Precision      : 16-bit
+Duration       : 00:00:04.20 = 67263 samples ~ 315.295 CDDA sectors
+File Size      : 135k
+Bit Rate       : 256k
+Sample Encoding: 16-bit Signed Integer PCM
 
 
-Input File     : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav'
-Channels       : 1
-Sample Rate    : 16000
-Precision      : 16-bit
-Duration       : 00:00:04.12 = 65840 samples ~ 308.625 CDDA sectors
-File Size      : 132k
-Bit Rate       : 256k
-Sample Encoding: 16-bit Signed Integer PCM
+Input File     : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav'
+Channels       : 1
+Sample Rate    : 16000
+Precision      : 16-bit
+Duration       : 00:00:04.12 = 65840 samples ~ 308.625 CDDA sectors
+File Size      : 132k
+Bit Rate       : 256k
+Sample Encoding: 16-bit Signed Integer PCM
 
 
-Input File     : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav'
-Channels       : 1
-Sample Rate    : 16000
-Precision      : 16-bit
-Duration       : 00:00:04.00 = 64000 samples ~ 300 CDDA sectors
-File Size      : 128k
-Bit Rate       : 256k
-Sample Encoding: 16-bit Signed Integer PCM
+Input File     : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav'
+Channels       : 1
+Sample Rate    : 16000
+Precision      : 16-bit
+Duration       : 00:00:04.00 = 64000 samples ~ 300 CDDA sectors
+File Size      : 128k
+Bit Rate       : 256k
+Sample Encoding: 16-bit Signed Integer PCM
 
-Total Duration of 3 files: 00:00:12.32
+Total Duration of 3 files: 00:00:12.32
 
@@ -655,14 +655,14 @@ it may give you poor results.