diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py index cfb80274a..ca997456f 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/decode.py @@ -17,7 +17,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -When training with the L subset, usage: +When training with the L subset, the offline usage: (1) greedy search ./pruned_transducer_stateless5/decode.py \ --epoch 4 \ @@ -48,6 +48,49 @@ When training with the L subset, usage: --beam 4 \ --max-contexts 4 \ --max-states 8 + +When training with the L subset, the streaming usage: +(1) greedy search +./pruned_transducer_stateless5/decode.py \ + --lang-dir data/lang_char \ + --exp-dir pruned_transducer_stateless5/exp_L_streaming \ + --use-averaged-model True \ + --max-duration 600 \ + --epoch 7 \ + --avg 1 \ + --decoding-method greedy_search \ + --simulate-streaming 1 \ + --causal-convolution 1 \ + --decode-chunk-size 16 \ + --left-context 64 + +(2) modified beam search +./pruned_transducer_stateless5/decode.py \ + --lang-dir data/lang_char \ + --exp-dir pruned_transducer_stateless5/exp_L_streaming \ + --use-averaged-model True \ + --max-duration 600 \ + --epoch 7 \ + --avg 1 \ + --decoding-method modified_beam_search \ + --simulate-streaming 1 \ + --causal-convolution 1 \ + --decode-chunk-size 16 \ + --left-context 64 + +(3) fast beam search +./pruned_transducer_stateless5/decode.py \ + --lang-dir data/lang_char \ + --exp-dir pruned_transducer_stateless5/exp_L_streaming \ + --use-averaged-model True \ + --max-duration 600 \ + --epoch 7 \ + --avg 1 \ + --decoding-method fast_beam_search \ + --simulate-streaming 1 \ + --causal-convolution 1 \ + --decode-chunk-size 16 \ + --left-context 64 """ diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py index 27ffc3bfc..1b064c874 100644 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py @@ -16,26 +16,26 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -Usage: +Offline Usage: (1) greedy search -./pruned_transducer_stateless2/pretrained.py \ - --checkpoint ./pruned_transducer_stateless2/exp/pretrained.pt \ +./pruned_transducer_stateless5/pretrained.py \ + --checkpoint ./pruned_transducer_stateless5/exp_L_offline/pretrained.pt \ --lang-dir ./data/lang_char \ --method greedy_search \ --max-sym-per-frame 1 \ /path/to/foo.wav \ /path/to/bar.wav (2) modified beam search -./pruned_transducer_stateless2/pretrained.py \ - --checkpoint ./pruned_transducer_stateless2/exp/pretrained.pt \ +./pruned_transducer_stateless5/pretrained.py \ + --checkpoint ./pruned_transducer_stateless5/exp_L_offline/pretrained.pt \ --lang-dir ./data/lang_char \ --method modified_beam_search \ --beam-size 4 \ /path/to/foo.wav \ /path/to/bar.wav (3) fast beam search -./pruned_transducer_stateless2/pretrained.py \ - --checkpoint ./pruned_transducer_stateless/exp/pretrained.pt \ +./pruned_transducer_stateless5/pretrained.py \ + --checkpoint ./pruned_transducer_stateless/exp_L_offline/pretrained.pt \ --lang-dir ./data/lang_char \ --method fast_beam_search \ --beam 4 \ @@ -43,9 +43,9 @@ Usage: --max-states 8 \ /path/to/foo.wav \ /path/to/bar.wav -You can also use `./pruned_transducer_stateless2/exp/epoch-xx.pt`. -Note: ./pruned_transducer_stateless2/exp/pretrained.pt is generated by -./pruned_transducer_stateless2/export.py +You can also use `./pruned_transducer_stateless5/exp_L_offline/epoch-xx.pt`. +Note: ./pruned_transducer_stateless5/exp_L_offline/pretrained.pt is generated by +./pruned_transducer_stateless5/export.py """ @@ -66,7 +66,7 @@ from beam_search import ( modified_beam_search, ) from torch.nn.utils.rnn import pad_sequence -from train import get_params, get_transducer_model +from train import add_model_arguments, get_params, get_transducer_model from icefall.lexicon import Lexicon @@ -169,6 +169,7 @@ def get_parser(): --method is greedy_search. """, ) + add_model_arguments(parser) return parser diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py index 7c4e89f9d..2a383ca46 100644 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py @@ -17,6 +17,7 @@ """ Usage: +(1) greedy search python pruned_transducer_stateless5/streaming_decode.py \ --epoch 7 \ --avg 1 \ @@ -26,6 +27,28 @@ python pruned_transducer_stateless5/streaming_decode.py \ --exp-dir ./pruned_transducer_stateless5/exp_L_streaming \ --decoding-method greedy_search \ --num-decode-streams 2000 + +(2) modified beam search +python pruned_transducer_stateless5/streaming_decode.py \ + --epoch 7 \ + --avg 1 \ + --decode-chunk-size 16 \ + --left-context 64 \ + --right-context 0 \ + --exp-dir ./pruned_transducer_stateless5/exp_L_streaming \ + --decoding-method modified_beam_search \ + --num-decode-streams 2000 + +(3) fast beam search +python pruned_transducer_stateless5/streaming_decode.py \ + --epoch 7 \ + --avg 1 \ + --decode-chunk-size 16 \ + --left-context 64 \ + --right-context 0 \ + --exp-dir ./pruned_transducer_stateless5/exp_L_streaming \ + --decoding-method fast_beam_search \ + --num-decode-streams 2000 """ import argparse diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/train.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/train.py index cc35e4044..7d09acc39 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/train.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/train.py @@ -18,67 +18,43 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -Usage: - -For training with the L subset: +Usage for offline ASR: export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" -./pruned_transducer_stateles5/train.py \ +./pruned_transducer_stateless5/train.py \ --lang-dir data/lang_char \ - --exp-dir pruned_transducer_stateless5/exp \ + --exp-dir pruned_transducer_stateless5/exp_L_offline \ --world-size 8 \ --num-epochs 15 \ - --start-epoch 0 \ - --max-duration 180 \ + --start-epoch 2 \ + --max-duration 120 \ --valid-interval 3000 \ --model-warm-step 3000 \ --save-every-n 8000 \ + --average-period 1000 \ --training-subset L -# For mix precision training: +Usage for streaming ASR: -./pruned_transducer_stateles/train.py \ +export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" + +./pruned_transducer_stateless5/train.py \ --lang-dir data/lang_char \ - --exp-dir pruned_transducer_stateless5/exp \ + --exp-dir pruned_transducer_stateless5/exp_L_streaming \ --world-size 8 \ - --num-epochs 10 \ - --start-epoch 0 \ - --max-duration 180 \ + --num-epochs 15 \ + --start-epoch 1 \ + --max-duration 140 \ --valid-interval 3000 \ --model-warm-step 3000 \ --save-every-n 8000 \ - --use-fp16 True \ - --training-subset L - -For training with the M subset: - -./pruned_transducer_stateles/train.py \ - --lang-dir data/lang_char \ - --exp-dir pruned_transducer_stateless5/exp \ - --world-size 8 \ - --num-epochs 29 \ - --start-epoch 0 \ - --max-duration 180 \ - --valid-interval 1000 \ - --model-warm-step 500 \ - --save-every-n 1000 \ - --training-subset M - -For training with the S subset: - -./pruned_transducer_stateles/train.py \ - --lang-dir data/lang_char \ - --exp-dir pruned_transducer_stateless5/exp \ - --world-size 8 \ - --num-epochs 29 \ - --start-epoch 0 \ - --max-duration 180 \ - --valid-interval 400 \ - --model-warm-step 100 \ - --save-every-n 1000 \ - --training-subset S - + --average-period 1000 \ + --training-subset L \ + --dynamic-chunk-training True \ + --causal-convolution True \ + --short-chunk-size 25 \ + --num-left-chunks 4 """ @@ -1183,7 +1159,6 @@ def scan_pessimistic_batches_for_oom( params: AttributeDict, warmup: float, ): - return from lhotse.dataset import find_pessimistic_batches logging.info(