mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-11 02:52:18 +00:00
Add ctc decoding to pretrained.py on conformer_ctc (#75)
* Add ctc-decoding to pretrained.py * update pretrained.py and conformer_ctc.rst * update ctc-decoding for pretrained.py on conformer_ctc * Update pretrained.py * fix the style issue * Update conformer_ctc.rst * Update the running logs
This commit is contained in:
parent
391432b356
commit
39bc8cae94
@ -429,6 +429,7 @@ After downloading, you will have the following files:
|
|||||||
|-- README.md
|
|-- README.md
|
||||||
|-- data
|
|-- data
|
||||||
| |-- lang_bpe
|
| |-- lang_bpe
|
||||||
|
| | |-- Linv.pt
|
||||||
| | |-- HLG.pt
|
| | |-- HLG.pt
|
||||||
| | |-- bpe.model
|
| | |-- bpe.model
|
||||||
| | |-- tokens.txt
|
| | |-- tokens.txt
|
||||||
@ -446,6 +447,9 @@ After downloading, you will have the following files:
|
|||||||
6 directories, 11 files
|
6 directories, 11 files
|
||||||
|
|
||||||
**File descriptions**:
|
**File descriptions**:
|
||||||
|
- ``data/lang_bpe/Linv.pt``
|
||||||
|
|
||||||
|
It is the lexicon file, with word IDs as labels and token IDs as aux_labels.
|
||||||
|
|
||||||
- ``data/lang_bpe/HLG.pt``
|
- ``data/lang_bpe/HLG.pt``
|
||||||
|
|
||||||
@ -527,12 +531,58 @@ Usage
|
|||||||
|
|
||||||
displays the help information.
|
displays the help information.
|
||||||
|
|
||||||
It supports three decoding methods:
|
It supports 4 decoding methods:
|
||||||
|
|
||||||
|
- CTC decoding
|
||||||
- HLG decoding
|
- HLG decoding
|
||||||
- HLG + n-gram LM rescoring
|
- HLG + n-gram LM rescoring
|
||||||
- HLG + n-gram LM rescoring + attention decoder rescoring
|
- HLG + n-gram LM rescoring + attention decoder rescoring
|
||||||
|
|
||||||
|
CTC decoding
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
|
||||||
|
CTC decoding uses the best path of the decoding lattice as the decoding result
|
||||||
|
without any LM or lexicon.
|
||||||
|
|
||||||
|
The command to run CTC decoding is:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ cd egs/librispeech/ASR
|
||||||
|
$ ./conformer_ctc/pretrained.py \
|
||||||
|
--checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
|
||||||
|
--lang-dir ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe \
|
||||||
|
--method ctc-decoding \
|
||||||
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac \
|
||||||
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac \
|
||||||
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac
|
||||||
|
|
||||||
|
The output is given below:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
2021-10-13 11:21:50,896 INFO [pretrained.py:236] device: cuda:0
|
||||||
|
2021-10-13 11:21:50,896 INFO [pretrained.py:238] Creating model
|
||||||
|
2021-10-13 11:21:56,669 INFO [pretrained.py:255] Constructing Fbank computer
|
||||||
|
2021-10-13 11:21:56,670 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
|
||||||
|
2021-10-13 11:21:56,683 INFO [pretrained.py:271] Decoding started
|
||||||
|
2021-10-13 11:21:57,341 INFO [pretrained.py:290] Building CTC topology
|
||||||
|
2021-10-13 11:21:57,625 INFO [lexicon.py:113] Loading pre-compiled tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/Linv.pt
|
||||||
|
2021-10-13 11:21:57,679 INFO [pretrained.py:299] Loading BPE model
|
||||||
|
2021-10-13 11:22:00,076 INFO [pretrained.py:314] Use CTC decoding
|
||||||
|
2021-10-13 11:22:00,087 INFO [pretrained.py:400]
|
||||||
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
|
||||||
|
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||||
|
|
||||||
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac:
|
||||||
|
GOD AS A DIRECT CONSEQUENCE OF THE SIN WHICH MAN THUS PUNISHED HAD GIVEN HER A LOVELY CHILD WHOSE PLACE WAS ON THAT SAME DISHONOURED
|
||||||
|
BOSOM TO CONNECT HER PARENT FOR EVER WITH THE RACE AND DESCENT OF MORTALS AND TO BE FINALLY A BLESSED SOUL IN HEAVEN
|
||||||
|
|
||||||
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
|
||||||
|
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
||||||
|
|
||||||
|
2021-10-13 11:22:00,087 INFO [pretrained.py:402] Decoding Done
|
||||||
|
|
||||||
HLG decoding
|
HLG decoding
|
||||||
^^^^^^^^^^^^
|
^^^^^^^^^^^^
|
||||||
|
|
||||||
@ -545,8 +595,7 @@ The command to run HLG decoding is:
|
|||||||
$ cd egs/librispeech/ASR
|
$ cd egs/librispeech/ASR
|
||||||
$ ./conformer_ctc/pretrained.py \
|
$ ./conformer_ctc/pretrained.py \
|
||||||
--checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
|
--checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
|
||||||
--words-file ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/words.txt \
|
--lang-dir ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe \
|
||||||
--HLG ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt \
|
|
||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac \
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac \
|
||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac \
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac \
|
||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac
|
||||||
@ -555,14 +604,14 @@ The output is given below:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
2021-08-20 11:03:05,712 INFO [pretrained.py:217] device: cuda:0
|
2021-10-13 11:25:19,458 INFO [pretrained.py:236] device: cuda:0
|
||||||
2021-08-20 11:03:05,712 INFO [pretrained.py:219] Creating model
|
2021-10-13 11:25:19,458 INFO [pretrained.py:238] Creating model
|
||||||
2021-08-20 11:03:11,345 INFO [pretrained.py:238] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
|
2021-10-13 11:25:25,342 INFO [pretrained.py:255] Constructing Fbank computer
|
||||||
2021-08-20 11:03:18,442 INFO [pretrained.py:255] Constructing Fbank computer
|
2021-10-13 11:25:25,343 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
|
||||||
2021-08-20 11:03:18,444 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
|
2021-10-13 11:25:25,356 INFO [pretrained.py:271] Decoding started
|
||||||
2021-08-20 11:03:18,507 INFO [pretrained.py:271] Decoding started
|
2021-10-13 11:25:26,026 INFO [pretrained.py:327] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
|
||||||
2021-08-20 11:03:18,795 INFO [pretrained.py:300] Use HLG decoding
|
2021-10-13 11:25:33,735 INFO [pretrained.py:359] Use HLG decoding
|
||||||
2021-08-20 11:03:19,149 INFO [pretrained.py:339]
|
2021-10-13 11:25:34,013 INFO [pretrained.py:400]
|
||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
|
||||||
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||||
|
|
||||||
@ -573,7 +622,7 @@ The output is given below:
|
|||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
|
||||||
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
||||||
|
|
||||||
2021-08-20 11:03:19,149 INFO [pretrained.py:341] Decoding Done
|
2021-10-13 11:25:34,014 INFO [pretrained.py:402] Decoding Done
|
||||||
|
|
||||||
HLG decoding + LM rescoring
|
HLG decoding + LM rescoring
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
@ -588,8 +637,7 @@ The command to run HLG decoding + LM rescoring is:
|
|||||||
$ cd egs/librispeech/ASR
|
$ cd egs/librispeech/ASR
|
||||||
$ ./conformer_ctc/pretrained.py \
|
$ ./conformer_ctc/pretrained.py \
|
||||||
--checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
|
--checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
|
||||||
--words-file ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/words.txt \
|
--lang-dir ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe \
|
||||||
--HLG ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt \
|
|
||||||
--method whole-lattice-rescoring \
|
--method whole-lattice-rescoring \
|
||||||
--G ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt \
|
--G ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt \
|
||||||
--ngram-lm-scale 0.8 \
|
--ngram-lm-scale 0.8 \
|
||||||
@ -601,15 +649,15 @@ Its output is:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
2021-08-20 11:12:17,565 INFO [pretrained.py:217] device: cuda:0
|
2021-10-13 11:28:19,129 INFO [pretrained.py:236] device: cuda:0
|
||||||
2021-08-20 11:12:17,565 INFO [pretrained.py:219] Creating model
|
2021-10-13 11:28:19,129 INFO [pretrained.py:238] Creating model
|
||||||
2021-08-20 11:12:23,728 INFO [pretrained.py:238] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
|
2021-10-13 11:28:23,531 INFO [pretrained.py:255] Constructing Fbank computer
|
||||||
2021-08-20 11:12:30,035 INFO [pretrained.py:246] Loading G from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt
|
2021-10-13 11:28:23,532 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
|
||||||
2021-08-20 11:13:10,779 INFO [pretrained.py:255] Constructing Fbank computer
|
2021-10-13 11:28:23,544 INFO [pretrained.py:271] Decoding started
|
||||||
2021-08-20 11:13:10,787 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
|
2021-10-13 11:28:24,141 INFO [pretrained.py:327] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
|
||||||
2021-08-20 11:13:10,798 INFO [pretrained.py:271] Decoding started
|
2021-10-13 11:28:30,752 INFO [pretrained.py:338] Loading G from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt
|
||||||
2021-08-20 11:13:11,085 INFO [pretrained.py:305] Use HLG decoding + LM rescoring
|
2021-10-13 11:28:48,308 INFO [pretrained.py:364] Use HLG decoding + LM rescoring
|
||||||
2021-08-20 11:13:11,736 INFO [pretrained.py:339]
|
2021-10-13 11:28:48,815 INFO [pretrained.py:400]
|
||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
|
||||||
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||||
|
|
||||||
@ -620,7 +668,7 @@ Its output is:
|
|||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
|
||||||
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
||||||
|
|
||||||
2021-08-20 11:13:11,737 INFO [pretrained.py:341] Decoding Done
|
2021-10-13 11:28:48,815 INFO [pretrained.py:402] Decoding Done
|
||||||
|
|
||||||
HLG decoding + LM rescoring + attention decoder rescoring
|
HLG decoding + LM rescoring + attention decoder rescoring
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
@ -636,8 +684,7 @@ The command to run HLG decoding + LM rescoring + attention decoder rescoring is:
|
|||||||
$ cd egs/librispeech/ASR
|
$ cd egs/librispeech/ASR
|
||||||
$ ./conformer_ctc/pretrained.py \
|
$ ./conformer_ctc/pretrained.py \
|
||||||
--checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
|
--checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
|
||||||
--words-file ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/words.txt \
|
--lang-dir ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe \
|
||||||
--HLG ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt \
|
|
||||||
--method attention-decoder \
|
--method attention-decoder \
|
||||||
--G ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt \
|
--G ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt \
|
||||||
--ngram-lm-scale 1.3 \
|
--ngram-lm-scale 1.3 \
|
||||||
@ -654,15 +701,15 @@ The output is below:
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
2021-08-20 11:19:11,397 INFO [pretrained.py:217] device: cuda:0
|
2021-10-13 11:29:50,106 INFO [pretrained.py:236] device: cuda:0
|
||||||
2021-08-20 11:19:11,397 INFO [pretrained.py:219] Creating model
|
2021-10-13 11:29:50,106 INFO [pretrained.py:238] Creating model
|
||||||
2021-08-20 11:19:17,354 INFO [pretrained.py:238] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
|
2021-10-13 11:29:56,063 INFO [pretrained.py:255] Constructing Fbank computer
|
||||||
2021-08-20 11:19:24,615 INFO [pretrained.py:246] Loading G from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt
|
2021-10-13 11:29:56,063 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
|
||||||
2021-08-20 11:20:04,576 INFO [pretrained.py:255] Constructing Fbank computer
|
2021-10-13 11:29:56,077 INFO [pretrained.py:271] Decoding started
|
||||||
2021-08-20 11:20:04,584 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
|
2021-10-13 11:29:56,770 INFO [pretrained.py:327] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
|
||||||
2021-08-20 11:20:04,595 INFO [pretrained.py:271] Decoding started
|
2021-10-13 11:30:04,023 INFO [pretrained.py:338] Loading G from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt
|
||||||
2021-08-20 11:20:04,854 INFO [pretrained.py:313] Use HLG + LM rescoring + attention decoder rescoring
|
2021-10-13 11:30:18,163 INFO [pretrained.py:372] Use HLG + LM rescoring + attention decoder rescoring
|
||||||
2021-08-20 11:20:05,805 INFO [pretrained.py:339]
|
2021-10-13 11:30:19,367 INFO [pretrained.py:400]
|
||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
|
||||||
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||||
|
|
||||||
@ -673,7 +720,7 @@ The output is below:
|
|||||||
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
|
./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
|
||||||
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
||||||
|
|
||||||
2021-08-20 11:20:05,805 INFO [pretrained.py:341] Decoding Done
|
2021-10-13 11:30:19,367 INFO [pretrained.py:402] Decoding Done
|
||||||
|
|
||||||
Colab notebook
|
Colab notebook
|
||||||
--------------
|
--------------
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
|
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang,
|
||||||
|
# Mingshuang Luo)
|
||||||
#
|
#
|
||||||
# See ../../../../LICENSE for clarification regarding multiple authors
|
# See ../../../../LICENSE for clarification regarding multiple authors
|
||||||
#
|
#
|
||||||
@ -19,6 +20,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
|
import sentencepiece as spm
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import k2
|
import k2
|
||||||
@ -28,6 +30,7 @@ import torchaudio
|
|||||||
from conformer import Conformer
|
from conformer import Conformer
|
||||||
from torch.nn.utils.rnn import pad_sequence
|
from torch.nn.utils.rnn import pad_sequence
|
||||||
|
|
||||||
|
from icefall.lexicon import Lexicon
|
||||||
from icefall.decode import (
|
from icefall.decode import (
|
||||||
get_lattice,
|
get_lattice,
|
||||||
one_best_decoding,
|
one_best_decoding,
|
||||||
@ -52,14 +55,10 @@ def get_parser():
|
|||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--words-file",
|
"--lang-dir",
|
||||||
type=str,
|
type=str,
|
||||||
required=True,
|
required=True,
|
||||||
help="Path to words.txt",
|
help="Path to lang bpe dir.",
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--HLG", type=str, required=True, help="Path to HLG.pt."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -68,6 +67,10 @@ def get_parser():
|
|||||||
default="1best",
|
default="1best",
|
||||||
help="""Decoding method.
|
help="""Decoding method.
|
||||||
Possible values are:
|
Possible values are:
|
||||||
|
(0) ctc-decoding - Use CTC decoding. It uses a sentence
|
||||||
|
piece model, i.e., lang_dir/bpe.model, to convert
|
||||||
|
word pieces to words. It needs neither a lexicon
|
||||||
|
nor an n-gram LM.
|
||||||
(1) 1best - Use the best path as decoding output. Only
|
(1) 1best - Use the best path as decoding output. Only
|
||||||
the transformer encoder output is used for decoding.
|
the transformer encoder output is used for decoding.
|
||||||
We call it HLG decoding.
|
We call it HLG decoding.
|
||||||
@ -249,23 +252,6 @@ def main():
|
|||||||
model.to(device)
|
model.to(device)
|
||||||
model.eval()
|
model.eval()
|
||||||
|
|
||||||
logging.info(f"Loading HLG from {params.HLG}")
|
|
||||||
HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
|
|
||||||
HLG = HLG.to(device)
|
|
||||||
if not hasattr(HLG, "lm_scores"):
|
|
||||||
# For whole-lattice-rescoring and attention-decoder
|
|
||||||
HLG.lm_scores = HLG.scores.clone()
|
|
||||||
|
|
||||||
if params.method in ["whole-lattice-rescoring", "attention-decoder"]:
|
|
||||||
logging.info(f"Loading G from {params.G}")
|
|
||||||
G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
|
|
||||||
# Add epsilon self-loops to G as we will compose
|
|
||||||
# it with the whole lattice later
|
|
||||||
G = G.to(device)
|
|
||||||
G = k2.add_epsilon_self_loops(G)
|
|
||||||
G = k2.arc_sort(G)
|
|
||||||
G.lm_scores = G.scores.clone()
|
|
||||||
|
|
||||||
logging.info("Constructing Fbank computer")
|
logging.info("Constructing Fbank computer")
|
||||||
opts = kaldifeat.FbankOptions()
|
opts = kaldifeat.FbankOptions()
|
||||||
opts.device = device
|
opts.device = device
|
||||||
@ -299,6 +285,67 @@ def main():
|
|||||||
dtype=torch.int32,
|
dtype=torch.int32,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if params.method == "ctc-decoding":
|
||||||
|
logging.info("Building CTC topology")
|
||||||
|
lexicon = Lexicon(params.lang_dir)
|
||||||
|
max_token_id = max(lexicon.tokens)
|
||||||
|
H = k2.ctc_topo(
|
||||||
|
max_token=max_token_id,
|
||||||
|
modified=False,
|
||||||
|
device=device,
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info("Loading BPE model")
|
||||||
|
bpe_model = spm.SentencePieceProcessor()
|
||||||
|
bpe_model.load(params.lang_dir + "/bpe.model")
|
||||||
|
|
||||||
|
lattice = get_lattice(
|
||||||
|
nnet_output=nnet_output,
|
||||||
|
decoding_graph=H,
|
||||||
|
supervision_segments=supervision_segments,
|
||||||
|
search_beam=params.search_beam,
|
||||||
|
output_beam=params.output_beam,
|
||||||
|
min_active_states=params.min_active_states,
|
||||||
|
max_active_states=params.max_active_states,
|
||||||
|
subsampling_factor=params.subsampling_factor,
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info("Use CTC decoding")
|
||||||
|
best_path = one_best_decoding(
|
||||||
|
lattice=lattice, use_double_scores=params.use_double_scores
|
||||||
|
)
|
||||||
|
token_ids = get_texts(best_path)
|
||||||
|
hyps = bpe_model.decode(token_ids)
|
||||||
|
hyps = [s.split() for s in hyps]
|
||||||
|
|
||||||
|
if params.method in [
|
||||||
|
"1best",
|
||||||
|
"whole-lattice-rescoring",
|
||||||
|
"attention-decoder",
|
||||||
|
]:
|
||||||
|
logging.info(f"Loading HLG from {params.lang_dir}/HLG.pt")
|
||||||
|
HLG = k2.Fsa.from_dict(
|
||||||
|
torch.load(params.lang_dir + "/HLG.pt", map_location="cpu")
|
||||||
|
)
|
||||||
|
HLG = HLG.to(device)
|
||||||
|
if not hasattr(HLG, "lm_scores"):
|
||||||
|
# For whole-lattice-rescoring and attention-decoder
|
||||||
|
HLG.lm_scores = HLG.scores.clone()
|
||||||
|
|
||||||
|
if params.method in [
|
||||||
|
"whole-lattice-rescoring",
|
||||||
|
"attention-decoder",
|
||||||
|
]:
|
||||||
|
logging.info(f"Loading G from {params.G}")
|
||||||
|
G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
|
||||||
|
# Add epsilon self-loops to G as we will compose
|
||||||
|
# it with the whole lattice later
|
||||||
|
G = G.to(device)
|
||||||
|
G = k2.add_epsilon_self_loops(G)
|
||||||
|
G = k2.arc_sort(G)
|
||||||
|
G.lm_scores = G.scores.clone()
|
||||||
|
|
||||||
lattice = get_lattice(
|
lattice = get_lattice(
|
||||||
nnet_output=nnet_output,
|
nnet_output=nnet_output,
|
||||||
decoding_graph=HLG,
|
decoding_graph=HLG,
|
||||||
@ -324,7 +371,9 @@ def main():
|
|||||||
)
|
)
|
||||||
best_path = next(iter(best_path_dict.values()))
|
best_path = next(iter(best_path_dict.values()))
|
||||||
elif params.method == "attention-decoder":
|
elif params.method == "attention-decoder":
|
||||||
logging.info("Use HLG + LM rescoring + attention decoder rescoring")
|
logging.info(
|
||||||
|
"Use HLG + LM rescoring + attention decoder rescoring"
|
||||||
|
)
|
||||||
rescored_lattice = rescore_with_whole_lattice(
|
rescored_lattice = rescore_with_whole_lattice(
|
||||||
lattice=lattice, G_with_epsilon_loops=G, lm_scale_list=None
|
lattice=lattice, G_with_epsilon_loops=G, lm_scale_list=None
|
||||||
)
|
)
|
||||||
@ -343,7 +392,9 @@ def main():
|
|||||||
best_path = next(iter(best_path_dict.values()))
|
best_path = next(iter(best_path_dict.values()))
|
||||||
|
|
||||||
hyps = get_texts(best_path)
|
hyps = get_texts(best_path)
|
||||||
word_sym_table = k2.SymbolTable.from_file(params.words_file)
|
word_sym_table = k2.SymbolTable.from_file(
|
||||||
|
params.lang_dir + "/words.txt"
|
||||||
|
)
|
||||||
hyps = [[word_sym_table[i] for i in ids] for ids in hyps]
|
hyps = [[word_sym_table[i] for i in ids] for ids in hyps]
|
||||||
|
|
||||||
s = "\n"
|
s = "\n"
|
||||||
@ -354,6 +405,9 @@ def main():
|
|||||||
|
|
||||||
logging.info("Decoding Done")
|
logging.info("Decoding Done")
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
raise ValueError("Please use a supported decoding method.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
formatter = (
|
formatter = (
|
||||||
|
Loading…
x
Reference in New Issue
Block a user