Add timit recipe for icefall

This commit is contained in:
Mingshuang Luo 2021-10-28 13:54:08 +08:00
parent 69c87203ca
commit e023a9df98
18 changed files with 7317 additions and 0 deletions

192
egs/timit/ASR/RESULTS.md Normal file
View File

@ -0,0 +1,192 @@
# results
# In this script, we use phone as modeling unit, so the PER equals to the WER.
command: CUDA_VISIBLE_DEVICES='0' python tdnn_lstm_ctc/decode.py --epoch=59 --avg=1
2021-10-28 13:14:51,693 INFO [decode.py:387] Decoding started
2021-10-28 13:14:51,693 INFO [decode.py:388] {'exp_dir': PosixPath('tdnn_lstm_ctc/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 80, 'subsampling_factor': 3, 'search_beam': 20, 'output_beam': 5, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 59, 'avg': 1, 'method': 'whole-lattice-rescoring', 'num_paths': 100, 'nbest_scale': 0.5, 'export': False, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 200.0, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
2021-10-28 13:14:51,733 INFO [lexicon.py:176] Loading pre-compiled data/lang_phone/Linv.pt
2021-10-28 13:14:51,910 INFO [decode.py:397] device: cuda:0
2021-10-28 13:14:58,958 INFO [decode.py:427] Loading pre-compiled G_4_gram.pt
2021-10-28 13:14:59,236 INFO [checkpoint.py:92] Loading checkpoint from tdnn_lstm_ctc/exp/epoch-59.pt
2021-10-28 13:15:01,789 INFO [decode.py:336] batch 0/?, cuts processed until now is 63
2021-10-28 13:15:03,065 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.1.txt
2021-10-28 13:15:03,085 INFO [utils.py:469] [TEST-lm_scale_0.1] %WER 21.47% [1549 / 7215, 169 ins, 466 del, 914 sub ]
2021-10-28 13:15:03,118 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.1.txt
2021-10-28 13:15:03,146 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.2.txt
2021-10-28 13:15:03,166 INFO [utils.py:469] [TEST-lm_scale_0.2] %WER 21.26% [1534 / 7215, 150 ins, 490 del, 894 sub ]
2021-10-28 13:15:03,198 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.2.txt
2021-10-28 13:15:03,226 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.3.txt
2021-10-28 13:15:03,246 INFO [utils.py:469] [TEST-lm_scale_0.3] %WER 21.41% [1545 / 7215, 138 ins, 521 del, 886 sub ]
2021-10-28 13:15:03,279 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.3.txt
2021-10-28 13:15:03,307 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.4.txt
2021-10-28 13:15:03,327 INFO [utils.py:469] [TEST-lm_scale_0.4] %WER 21.73% [1568 / 7215, 127 ins, 566 del, 875 sub ]
2021-10-28 13:15:03,365 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.4.txt
2021-10-28 13:15:03,393 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.5.txt
2021-10-28 13:15:03,413 INFO [utils.py:469] [TEST-lm_scale_0.5] %WER 22.16% [1599 / 7215, 114 ins, 607 del, 878 sub ]
2021-10-28 13:15:03,445 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.5.txt
2021-10-28 13:15:03,474 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.6.txt
2021-10-28 13:15:03,494 INFO [utils.py:469] [TEST-lm_scale_0.6] %WER 22.76% [1642 / 7215, 109 ins, 638 del, 895 sub ]
2021-10-28 13:15:03,526 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.6.txt
2021-10-28 13:15:03,554 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.7.txt
2021-10-28 13:15:03,574 INFO [utils.py:469] [TEST-lm_scale_0.7] %WER 23.27% [1679 / 7215, 100 ins, 689 del, 890 sub ]
2021-10-28 13:15:03,611 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.7.txt
2021-10-28 13:15:03,639 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.8.txt
2021-10-28 13:15:03,660 INFO [utils.py:469] [TEST-lm_scale_0.8] %WER 24.21% [1747 / 7215, 96 ins, 745 del, 906 sub ]
2021-10-28 13:15:03,699 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.8.txt
2021-10-28 13:15:03,727 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.9.txt
2021-10-28 13:15:03,747 INFO [utils.py:469] [TEST-lm_scale_0.9] %WER 24.99% [1803 / 7215, 95 ins, 796 del, 912 sub ]
2021-10-28 13:15:03,783 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.9.txt
2021-10-28 13:15:03,811 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.0.txt
2021-10-28 13:15:03,830 INFO [utils.py:469] [TEST-lm_scale_1.0] %WER 25.61% [1848 / 7215, 92 ins, 844 del, 912 sub ]
2021-10-28 13:15:03,863 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.0.txt
2021-10-28 13:15:03,890 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.1.txt
2021-10-28 13:15:03,910 INFO [utils.py:469] [TEST-lm_scale_1.1] %WER 26.54% [1915 / 7215, 81 ins, 923 del, 911 sub ]
2021-10-28 13:15:03,943 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.1.txt
2021-10-28 13:15:03,971 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.2.txt
2021-10-28 13:15:03,991 INFO [utils.py:469] [TEST-lm_scale_1.2] %WER 27.50% [1984 / 7215, 76 ins, 986 del, 922 sub ]
2021-10-28 13:15:04,023 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.2.txt
2021-10-28 13:15:04,051 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.3.txt
2021-10-28 13:15:04,070 INFO [utils.py:469] [TEST-lm_scale_1.3] %WER 28.26% [2039 / 7215, 69 ins, 1046 del, 924 sub ]
2021-10-28 13:15:04,102 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.3.txt
2021-10-28 13:15:04,130 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.4.txt
2021-10-28 13:15:04,150 INFO [utils.py:469] [TEST-lm_scale_1.4] %WER 28.79% [2077 / 7215, 63 ins, 1100 del, 914 sub ]
2021-10-28 13:15:04,183 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.4.txt
2021-10-28 13:15:04,211 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.5.txt
2021-10-28 13:15:04,231 INFO [utils.py:469] [TEST-lm_scale_1.5] %WER 29.72% [2144 / 7215, 56 ins, 1178 del, 910 sub ]
2021-10-28 13:15:04,263 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.5.txt
2021-10-28 13:15:04,291 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.6.txt
2021-10-28 13:15:04,311 INFO [utils.py:469] [TEST-lm_scale_1.6] %WER 30.51% [2201 / 7215, 50 ins, 1250 del, 901 sub ]
2021-10-28 13:15:04,343 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.6.txt
2021-10-28 13:15:04,371 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.7.txt
2021-10-28 13:15:04,391 INFO [utils.py:469] [TEST-lm_scale_1.7] %WER 31.30% [2258 / 7215, 44 ins, 1317 del, 897 sub ]
2021-10-28 13:15:04,423 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.7.txt
2021-10-28 13:15:04,451 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.8.txt
2021-10-28 13:15:04,470 INFO [utils.py:469] [TEST-lm_scale_1.8] %WER 32.22% [2325 / 7215, 45 ins, 1374 del, 906 sub ]
2021-10-28 13:15:04,503 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.8.txt
2021-10-28 13:15:04,531 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.9.txt
2021-10-28 13:15:04,550 INFO [utils.py:469] [TEST-lm_scale_1.9] %WER 33.17% [2393 / 7215, 43 ins, 1444 del, 906 sub ]
2021-10-28 13:15:04,582 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.9.txt
2021-10-28 13:15:04,610 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_2.0.txt
2021-10-28 13:15:04,630 INFO [utils.py:469] [TEST-lm_scale_2.0] %WER 34.03% [2455 / 7215, 41 ins, 1510 del, 904 sub ]
2021-10-28 13:15:04,662 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_2.0.txt
2021-10-28 13:15:04,682 INFO [decode.py:374]
For TEST, PER of different settings are:
lm_scale_0.2 21.26 best for TEST
lm_scale_0.3 21.41
lm_scale_0.1 21.47
lm_scale_0.4 21.73
lm_scale_0.5 22.16
lm_scale_0.6 22.76
lm_scale_0.7 23.27
lm_scale_0.8 24.21
lm_scale_0.9 24.99
lm_scale_1.0 25.61
lm_scale_1.1 26.54
lm_scale_1.2 27.5
lm_scale_1.3 28.26
lm_scale_1.4 28.79
lm_scale_1.5 29.72
lm_scale_1.6 30.51
lm_scale_1.7 31.3
lm_scale_1.8 32.22
lm_scale_1.9 33.17
lm_scale_2.0 34.03
2021-10-28 13:15:04,682 INFO [decode.py:498] Done!
command: CUDA_VISIBLE_DEVICES='0' python tdnn_lstm_ctc/decode.py --epoch=59 --avg=5
2021-10-28 13:20:28,962 INFO [decode.py:387] Decoding started
2021-10-28 13:20:28,962 INFO [decode.py:388] {'exhell
_dir': PosixPath('tdnn_lstm_ctc/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 80, 'subsampling_factor': 3, 'search_beam': 20, 'output_beam': 5, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 59, 'avg': 5, 'method': 'whole-lattice-rescoring', 'num_paths': 100, 'nbest_scale': 0.5, 'export': False, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 200.0, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
2021-10-28 13:20:29,002 INFO [lexicon.py:176] Loading pre-compiled data/lang_phone/Linv.pt
2021-10-28 13:20:29,153 INFO [decode.py:397] device: cuda:0
2021-10-28 13:20:35,947 INFO [decode.py:427] Loading pre-compiled G_4_gram.pt
2021-10-28 13:20:36,097 INFO [decode.py:458] averaging ['tdnn_lstm_ctc/exp/epoch-55.pt', 'tdnn_lstm_ctc/exp/epoch-56.pt', 'tdnn_lstm_ctc/exp/epoch-57.pt', 'tdnn_lstm_ctc/exp/epoch-58.pt', 'tdnn_lstm_ctc/exp/epoch-59.pt']
2021-10-28 13:20:39,819 INFO [decode.py:336] batch 0/?, cuts processed until now is 63
2021-10-28 13:20:41,218 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.1.txt
2021-10-28 13:20:41,239 INFO [utils.py:469] [TEST-lm_scale_0.1] %WER 20.82% [1502 / 7215, 144 ins, 478 del, 880 sub ]
2021-10-28 13:20:41,279 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.1.txt
2021-10-28 13:20:41,307 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.2.txt
2021-10-28 13:20:41,327 INFO [utils.py:469] [TEST-lm_scale_0.2] %WER 20.93% [1510 / 7215, 134 ins, 504 del, 872 sub ]
2021-10-28 13:20:41,365 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.2.txt
2021-10-28 13:20:41,395 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.3.txt
2021-10-28 13:20:41,415 INFO [utils.py:469] [TEST-lm_scale_0.3] %WER 21.33% [1539 / 7215, 122 ins, 541 del, 876 sub ]
2021-10-28 13:20:41,447 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.3.txt
2021-10-28 13:20:41,476 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.4.txt
2021-10-28 13:20:41,498 INFO [utils.py:469] [TEST-lm_scale_0.4] %WER 21.91% [1581 / 7215, 119 ins, 587 del, 875 sub ]
2021-10-28 13:20:41,530 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.4.txt
2021-10-28 13:20:41,563 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.5.txt
2021-10-28 13:20:41,591 INFO [utils.py:469] [TEST-lm_scale_0.5] %WER 22.58% [1629 / 7215, 116 ins, 636 del, 877 sub ]
2021-10-28 13:20:41,624 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.5.txt
2021-10-28 13:20:41,652 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.6.txt
2021-10-28 13:20:41,679 INFO [utils.py:469] [TEST-lm_scale_0.6] %WER 23.20% [1674 / 7215, 106 ins, 682 del, 886 sub ]
2021-10-28 13:20:41,712 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.6.txt
2021-10-28 13:20:41,740 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.7.txt
2021-10-28 13:20:41,768 INFO [utils.py:469] [TEST-lm_scale_0.7] %WER 23.76% [1714 / 7215, 92 ins, 738 del, 884 sub ]
2021-10-28 13:20:41,802 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.7.txt
2021-10-28 13:20:41,830 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.8.txt
2021-10-28 13:20:41,851 INFO [utils.py:469] [TEST-lm_scale_0.8] %WER 24.46% [1765 / 7215, 90 ins, 796 del, 879 sub ]
2021-10-28 13:20:41,892 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.8.txt
2021-10-28 13:20:41,920 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.9.txt
2021-10-28 13:20:41,940 INFO [utils.py:469] [TEST-lm_scale_0.9] %WER 25.16% [1815 / 7215, 81 ins, 843 del, 891 sub ]
2021-10-28 13:20:41,976 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.9.txt
2021-10-28 13:20:42,004 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.0.txt
2021-10-28 13:20:42,024 INFO [utils.py:469] [TEST-lm_scale_1.0] %WER 25.84% [1864 / 7215, 73 ins, 892 del, 899 sub ]
2021-10-28 13:20:42,067 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.0.txt
2021-10-28 13:20:42,099 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.1.txt
2021-10-28 13:20:42,119 INFO [utils.py:469] [TEST-lm_scale_1.1] %WER 26.46% [1909 / 7215, 69 ins, 932 del, 908 sub ]
2021-10-28 13:20:42,152 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.1.txt
2021-10-28 13:20:42,184 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.2.txt
2021-10-28 13:20:42,204 INFO [utils.py:469] [TEST-lm_scale_1.2] %WER 27.23% [1965 / 7215, 66 ins, 989 del, 910 sub ]
2021-10-28 13:20:42,241 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.2.txt
2021-10-28 13:20:42,280 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.3.txt
2021-10-28 13:20:42,300 INFO [utils.py:469] [TEST-lm_scale_1.3] %WER 28.01% [2021 / 7215, 60 ins, 1055 del, 906 sub ]
2021-10-28 13:20:42,332 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.3.txt
2021-10-28 13:20:42,360 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.4.txt
2021-10-28 13:20:42,386 INFO [utils.py:469] [TEST-lm_scale_1.4] %WER 29.04% [2095 / 7215, 54 ins, 1134 del, 907 sub ]
2021-10-28 13:20:42,425 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.4.txt
2021-10-28 13:20:42,454 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.5.txt
2021-10-28 13:20:42,477 INFO [utils.py:469] [TEST-lm_scale_1.5] %WER 30.08% [2170 / 7215, 48 ins, 1222 del, 900 sub ]
2021-10-28 13:20:42,516 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.5.txt
2021-10-28 13:20:42,544 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.6.txt
2021-10-28 13:20:42,567 INFO [utils.py:469] [TEST-lm_scale_1.6] %WER 31.02% [2238 / 7215, 41 ins, 1285 del, 912 sub ]
2021-10-28 13:20:42,602 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.6.txt
2021-10-28 13:20:42,630 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.7.txt
2021-10-28 13:20:42,650 INFO [utils.py:469] [TEST-lm_scale_1.7] %WER 31.73% [2289 / 7215, 40 ins, 1336 del, 913 sub ]
2021-10-28 13:20:42,692 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.7.txt
2021-10-28 13:20:42,720 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.8.txt
2021-10-28 13:20:42,740 INFO [utils.py:469] [TEST-lm_scale_1.8] %WER 32.52% [2346 / 7215, 39 ins, 1407 del, 900 sub ]
2021-10-28 13:20:42,780 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.8.txt
2021-10-28 13:20:42,808 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.9.txt
2021-10-28 13:20:42,828 INFO [utils.py:469] [TEST-lm_scale_1.9] %WER 33.35% [2406 / 7215, 40 ins, 1460 del, 906 sub ]
2021-10-28 13:20:42,865 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.9.txt
2021-10-28 13:20:42,899 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_2.0.txt
2021-10-28 13:20:42,919 INFO [utils.py:469] [TEST-lm_scale_2.0] %WER 33.97% [2451 / 7215, 39 ins, 1510 del, 902 sub ]
2021-10-28 13:20:42,952 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_2.0.txt
2021-10-28 13:20:42,986 INFO [decode.py:374]
For TEST, PER of different settings are:
lm_scale_0.1 20.82 best for TEST
lm_scale_0.2 20.93
lm_scale_0.3 21.33
lm_scale_0.4 21.91
lm_scale_0.5 22.58
lm_scale_0.6 23.2
lm_scale_0.7 23.76
lm_scale_0.8 24.46
lm_scale_0.9 25.16
lm_scale_1.0 25.84
lm_scale_1.1 26.46
lm_scale_1.2 27.23
lm_scale_1.3 28.01
lm_scale_1.4 29.04
lm_scale_1.5 30.08
lm_scale_1.6 31.02
lm_scale_1.7 31.73
lm_scale_1.8 32.52
lm_scale_1.9 33.35
lm_scale_2.0 33.97
2021-10-28 13:20:42,986 INFO [decode.py:498] Done!

View File

@ -0,0 +1,192 @@
# results
# In this script, we use phone as modeling unit, so the PER equals to the WER.
command: CUDA_VISIBLE_DEVICES='0' python tdnn_lstm_ctc/decode.py --epoch=59 --avg=1
2021-10-28 13:14:51,693 INFO [decode.py:387] Decoding started
2021-10-28 13:14:51,693 INFO [decode.py:388] {'exp_dir': PosixPath('tdnn_lstm_ctc/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 80, 'subsampling_factor': 3, 'search_beam': 20, 'output_beam': 5, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 59, 'avg': 1, 'method': 'whole-lattice-rescoring', 'num_paths': 100, 'nbest_scale': 0.5, 'export': False, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 200.0, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
2021-10-28 13:14:51,733 INFO [lexicon.py:176] Loading pre-compiled data/lang_phone/Linv.pt
2021-10-28 13:14:51,910 INFO [decode.py:397] device: cuda:0
2021-10-28 13:14:58,958 INFO [decode.py:427] Loading pre-compiled G_4_gram.pt
2021-10-28 13:14:59,236 INFO [checkpoint.py:92] Loading checkpoint from tdnn_lstm_ctc/exp/epoch-59.pt
2021-10-28 13:15:01,789 INFO [decode.py:336] batch 0/?, cuts processed until now is 63
2021-10-28 13:15:03,065 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.1.txt
2021-10-28 13:15:03,085 INFO [utils.py:469] [TEST-lm_scale_0.1] %WER 21.47% [1549 / 7215, 169 ins, 466 del, 914 sub ]
2021-10-28 13:15:03,118 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.1.txt
2021-10-28 13:15:03,146 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.2.txt
2021-10-28 13:15:03,166 INFO [utils.py:469] [TEST-lm_scale_0.2] %WER 21.26% [1534 / 7215, 150 ins, 490 del, 894 sub ]
2021-10-28 13:15:03,198 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.2.txt
2021-10-28 13:15:03,226 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.3.txt
2021-10-28 13:15:03,246 INFO [utils.py:469] [TEST-lm_scale_0.3] %WER 21.41% [1545 / 7215, 138 ins, 521 del, 886 sub ]
2021-10-28 13:15:03,279 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.3.txt
2021-10-28 13:15:03,307 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.4.txt
2021-10-28 13:15:03,327 INFO [utils.py:469] [TEST-lm_scale_0.4] %WER 21.73% [1568 / 7215, 127 ins, 566 del, 875 sub ]
2021-10-28 13:15:03,365 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.4.txt
2021-10-28 13:15:03,393 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.5.txt
2021-10-28 13:15:03,413 INFO [utils.py:469] [TEST-lm_scale_0.5] %WER 22.16% [1599 / 7215, 114 ins, 607 del, 878 sub ]
2021-10-28 13:15:03,445 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.5.txt
2021-10-28 13:15:03,474 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.6.txt
2021-10-28 13:15:03,494 INFO [utils.py:469] [TEST-lm_scale_0.6] %WER 22.76% [1642 / 7215, 109 ins, 638 del, 895 sub ]
2021-10-28 13:15:03,526 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.6.txt
2021-10-28 13:15:03,554 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.7.txt
2021-10-28 13:15:03,574 INFO [utils.py:469] [TEST-lm_scale_0.7] %WER 23.27% [1679 / 7215, 100 ins, 689 del, 890 sub ]
2021-10-28 13:15:03,611 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.7.txt
2021-10-28 13:15:03,639 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.8.txt
2021-10-28 13:15:03,660 INFO [utils.py:469] [TEST-lm_scale_0.8] %WER 24.21% [1747 / 7215, 96 ins, 745 del, 906 sub ]
2021-10-28 13:15:03,699 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.8.txt
2021-10-28 13:15:03,727 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.9.txt
2021-10-28 13:15:03,747 INFO [utils.py:469] [TEST-lm_scale_0.9] %WER 24.99% [1803 / 7215, 95 ins, 796 del, 912 sub ]
2021-10-28 13:15:03,783 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.9.txt
2021-10-28 13:15:03,811 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.0.txt
2021-10-28 13:15:03,830 INFO [utils.py:469] [TEST-lm_scale_1.0] %WER 25.61% [1848 / 7215, 92 ins, 844 del, 912 sub ]
2021-10-28 13:15:03,863 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.0.txt
2021-10-28 13:15:03,890 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.1.txt
2021-10-28 13:15:03,910 INFO [utils.py:469] [TEST-lm_scale_1.1] %WER 26.54% [1915 / 7215, 81 ins, 923 del, 911 sub ]
2021-10-28 13:15:03,943 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.1.txt
2021-10-28 13:15:03,971 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.2.txt
2021-10-28 13:15:03,991 INFO [utils.py:469] [TEST-lm_scale_1.2] %WER 27.50% [1984 / 7215, 76 ins, 986 del, 922 sub ]
2021-10-28 13:15:04,023 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.2.txt
2021-10-28 13:15:04,051 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.3.txt
2021-10-28 13:15:04,070 INFO [utils.py:469] [TEST-lm_scale_1.3] %WER 28.26% [2039 / 7215, 69 ins, 1046 del, 924 sub ]
2021-10-28 13:15:04,102 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.3.txt
2021-10-28 13:15:04,130 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.4.txt
2021-10-28 13:15:04,150 INFO [utils.py:469] [TEST-lm_scale_1.4] %WER 28.79% [2077 / 7215, 63 ins, 1100 del, 914 sub ]
2021-10-28 13:15:04,183 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.4.txt
2021-10-28 13:15:04,211 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.5.txt
2021-10-28 13:15:04,231 INFO [utils.py:469] [TEST-lm_scale_1.5] %WER 29.72% [2144 / 7215, 56 ins, 1178 del, 910 sub ]
2021-10-28 13:15:04,263 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.5.txt
2021-10-28 13:15:04,291 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.6.txt
2021-10-28 13:15:04,311 INFO [utils.py:469] [TEST-lm_scale_1.6] %WER 30.51% [2201 / 7215, 50 ins, 1250 del, 901 sub ]
2021-10-28 13:15:04,343 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.6.txt
2021-10-28 13:15:04,371 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.7.txt
2021-10-28 13:15:04,391 INFO [utils.py:469] [TEST-lm_scale_1.7] %WER 31.30% [2258 / 7215, 44 ins, 1317 del, 897 sub ]
2021-10-28 13:15:04,423 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.7.txt
2021-10-28 13:15:04,451 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.8.txt
2021-10-28 13:15:04,470 INFO [utils.py:469] [TEST-lm_scale_1.8] %WER 32.22% [2325 / 7215, 45 ins, 1374 del, 906 sub ]
2021-10-28 13:15:04,503 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.8.txt
2021-10-28 13:15:04,531 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.9.txt
2021-10-28 13:15:04,550 INFO [utils.py:469] [TEST-lm_scale_1.9] %WER 33.17% [2393 / 7215, 43 ins, 1444 del, 906 sub ]
2021-10-28 13:15:04,582 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.9.txt
2021-10-28 13:15:04,610 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_2.0.txt
2021-10-28 13:15:04,630 INFO [utils.py:469] [TEST-lm_scale_2.0] %WER 34.03% [2455 / 7215, 41 ins, 1510 del, 904 sub ]
2021-10-28 13:15:04,662 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_2.0.txt
2021-10-28 13:15:04,682 INFO [decode.py:374]
For TEST, PER of different settings are:
lm_scale_0.2 21.26 best for TEST
lm_scale_0.3 21.41
lm_scale_0.1 21.47
lm_scale_0.4 21.73
lm_scale_0.5 22.16
lm_scale_0.6 22.76
lm_scale_0.7 23.27
lm_scale_0.8 24.21
lm_scale_0.9 24.99
lm_scale_1.0 25.61
lm_scale_1.1 26.54
lm_scale_1.2 27.5
lm_scale_1.3 28.26
lm_scale_1.4 28.79
lm_scale_1.5 29.72
lm_scale_1.6 30.51
lm_scale_1.7 31.3
lm_scale_1.8 32.22
lm_scale_1.9 33.17
lm_scale_2.0 34.03
2021-10-28 13:15:04,682 INFO [decode.py:498] Done!
command: CUDA_VISIBLE_DEVICES='0' python tdnn_lstm_ctc/decode.py --epoch=59 --avg=5
2021-10-28 13:20:28,962 INFO [decode.py:387] Decoding started
2021-10-28 13:20:28,962 INFO [decode.py:388] {'exhell
_dir': PosixPath('tdnn_lstm_ctc/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 80, 'subsampling_factor': 3, 'search_beam': 20, 'output_beam': 5, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 59, 'avg': 5, 'method': 'whole-lattice-rescoring', 'num_paths': 100, 'nbest_scale': 0.5, 'export': False, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 200.0, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
2021-10-28 13:20:29,002 INFO [lexicon.py:176] Loading pre-compiled data/lang_phone/Linv.pt
2021-10-28 13:20:29,153 INFO [decode.py:397] device: cuda:0
2021-10-28 13:20:35,947 INFO [decode.py:427] Loading pre-compiled G_4_gram.pt
2021-10-28 13:20:36,097 INFO [decode.py:458] averaging ['tdnn_lstm_ctc/exp/epoch-55.pt', 'tdnn_lstm_ctc/exp/epoch-56.pt', 'tdnn_lstm_ctc/exp/epoch-57.pt', 'tdnn_lstm_ctc/exp/epoch-58.pt', 'tdnn_lstm_ctc/exp/epoch-59.pt']
2021-10-28 13:20:39,819 INFO [decode.py:336] batch 0/?, cuts processed until now is 63
2021-10-28 13:20:41,218 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.1.txt
2021-10-28 13:20:41,239 INFO [utils.py:469] [TEST-lm_scale_0.1] %WER 20.82% [1502 / 7215, 144 ins, 478 del, 880 sub ]
2021-10-28 13:20:41,279 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.1.txt
2021-10-28 13:20:41,307 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.2.txt
2021-10-28 13:20:41,327 INFO [utils.py:469] [TEST-lm_scale_0.2] %WER 20.93% [1510 / 7215, 134 ins, 504 del, 872 sub ]
2021-10-28 13:20:41,365 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.2.txt
2021-10-28 13:20:41,395 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.3.txt
2021-10-28 13:20:41,415 INFO [utils.py:469] [TEST-lm_scale_0.3] %WER 21.33% [1539 / 7215, 122 ins, 541 del, 876 sub ]
2021-10-28 13:20:41,447 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.3.txt
2021-10-28 13:20:41,476 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.4.txt
2021-10-28 13:20:41,498 INFO [utils.py:469] [TEST-lm_scale_0.4] %WER 21.91% [1581 / 7215, 119 ins, 587 del, 875 sub ]
2021-10-28 13:20:41,530 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.4.txt
2021-10-28 13:20:41,563 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.5.txt
2021-10-28 13:20:41,591 INFO [utils.py:469] [TEST-lm_scale_0.5] %WER 22.58% [1629 / 7215, 116 ins, 636 del, 877 sub ]
2021-10-28 13:20:41,624 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.5.txt
2021-10-28 13:20:41,652 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.6.txt
2021-10-28 13:20:41,679 INFO [utils.py:469] [TEST-lm_scale_0.6] %WER 23.20% [1674 / 7215, 106 ins, 682 del, 886 sub ]
2021-10-28 13:20:41,712 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.6.txt
2021-10-28 13:20:41,740 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.7.txt
2021-10-28 13:20:41,768 INFO [utils.py:469] [TEST-lm_scale_0.7] %WER 23.76% [1714 / 7215, 92 ins, 738 del, 884 sub ]
2021-10-28 13:20:41,802 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.7.txt
2021-10-28 13:20:41,830 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.8.txt
2021-10-28 13:20:41,851 INFO [utils.py:469] [TEST-lm_scale_0.8] %WER 24.46% [1765 / 7215, 90 ins, 796 del, 879 sub ]
2021-10-28 13:20:41,892 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.8.txt
2021-10-28 13:20:41,920 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_0.9.txt
2021-10-28 13:20:41,940 INFO [utils.py:469] [TEST-lm_scale_0.9] %WER 25.16% [1815 / 7215, 81 ins, 843 del, 891 sub ]
2021-10-28 13:20:41,976 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_0.9.txt
2021-10-28 13:20:42,004 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.0.txt
2021-10-28 13:20:42,024 INFO [utils.py:469] [TEST-lm_scale_1.0] %WER 25.84% [1864 / 7215, 73 ins, 892 del, 899 sub ]
2021-10-28 13:20:42,067 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.0.txt
2021-10-28 13:20:42,099 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.1.txt
2021-10-28 13:20:42,119 INFO [utils.py:469] [TEST-lm_scale_1.1] %WER 26.46% [1909 / 7215, 69 ins, 932 del, 908 sub ]
2021-10-28 13:20:42,152 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.1.txt
2021-10-28 13:20:42,184 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.2.txt
2021-10-28 13:20:42,204 INFO [utils.py:469] [TEST-lm_scale_1.2] %WER 27.23% [1965 / 7215, 66 ins, 989 del, 910 sub ]
2021-10-28 13:20:42,241 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.2.txt
2021-10-28 13:20:42,280 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.3.txt
2021-10-28 13:20:42,300 INFO [utils.py:469] [TEST-lm_scale_1.3] %WER 28.01% [2021 / 7215, 60 ins, 1055 del, 906 sub ]
2021-10-28 13:20:42,332 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.3.txt
2021-10-28 13:20:42,360 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.4.txt
2021-10-28 13:20:42,386 INFO [utils.py:469] [TEST-lm_scale_1.4] %WER 29.04% [2095 / 7215, 54 ins, 1134 del, 907 sub ]
2021-10-28 13:20:42,425 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.4.txt
2021-10-28 13:20:42,454 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.5.txt
2021-10-28 13:20:42,477 INFO [utils.py:469] [TEST-lm_scale_1.5] %WER 30.08% [2170 / 7215, 48 ins, 1222 del, 900 sub ]
2021-10-28 13:20:42,516 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.5.txt
2021-10-28 13:20:42,544 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.6.txt
2021-10-28 13:20:42,567 INFO [utils.py:469] [TEST-lm_scale_1.6] %WER 31.02% [2238 / 7215, 41 ins, 1285 del, 912 sub ]
2021-10-28 13:20:42,602 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.6.txt
2021-10-28 13:20:42,630 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.7.txt
2021-10-28 13:20:42,650 INFO [utils.py:469] [TEST-lm_scale_1.7] %WER 31.73% [2289 / 7215, 40 ins, 1336 del, 913 sub ]
2021-10-28 13:20:42,692 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.7.txt
2021-10-28 13:20:42,720 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.8.txt
2021-10-28 13:20:42,740 INFO [utils.py:469] [TEST-lm_scale_1.8] %WER 32.52% [2346 / 7215, 39 ins, 1407 del, 900 sub ]
2021-10-28 13:20:42,780 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.8.txt
2021-10-28 13:20:42,808 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_1.9.txt
2021-10-28 13:20:42,828 INFO [utils.py:469] [TEST-lm_scale_1.9] %WER 33.35% [2406 / 7215, 40 ins, 1460 del, 906 sub ]
2021-10-28 13:20:42,865 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_1.9.txt
2021-10-28 13:20:42,899 INFO [decode.py:351] The transcripts are stored in tdnn_lstm_ctc/exp/recogs-TEST-lm_scale_2.0.txt
2021-10-28 13:20:42,919 INFO [utils.py:469] [TEST-lm_scale_2.0] %WER 33.97% [2451 / 7215, 39 ins, 1510 del, 902 sub ]
2021-10-28 13:20:42,952 INFO [decode.py:360] Wrote detailed error stats to tdnn_lstm_ctc/exp/errs-TEST-lm_scale_2.0.txt
2021-10-28 13:20:42,986 INFO [decode.py:374]
For TEST, PER of different settings are:
lm_scale_0.1 20.82 best for TEST
lm_scale_0.2 20.93
lm_scale_0.3 21.33
lm_scale_0.4 21.91
lm_scale_0.5 22.58
lm_scale_0.6 23.2
lm_scale_0.7 23.76
lm_scale_0.8 24.46
lm_scale_0.9 25.16
lm_scale_1.0 25.84
lm_scale_1.1 26.46
lm_scale_1.2 27.23
lm_scale_1.3 28.01
lm_scale_1.4 29.04
lm_scale_1.5 30.08
lm_scale_1.6 31.02
lm_scale_1.7 31.73
lm_scale_1.8 32.52
lm_scale_1.9 33.35
lm_scale_2.0 33.97
2021-10-28 13:20:42,986 INFO [decode.py:498] Done!

View File

View File

@ -0,0 +1,166 @@
#!/usr/bin/env python3
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script takes as input lang_dir and generates HLG from
- H, the ctc topology, built from tokens contained in lang_dir/lexicon.txt
- L, the lexicon, built from lang_dir/L_disambig.pt
Caution: We use a lexicon that contains disambiguation symbols
- G, the LM, built from data/lm/G_3_gram.fst.txt
The generated HLG is saved in $lang_dir/HLG.pt
"""
import argparse
import logging
from pathlib import Path
import k2
import torch
from icefall.lexicon import Lexicon
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--lang-dir",
type=str,
help="""Input and output directory.
""",
)
return parser.parse_args()
def compile_HLG(lang_dir: str) -> k2.Fsa:
"""
Args:
lang_dir:
The language directory, e.g., data/lang_phone or data/lang_bpe_5000.
Return:
An FSA representing HLG.
"""
lexicon = Lexicon(lang_dir)
max_token_id = max(lexicon.tokens)
logging.info(f"Building ctc_topo. max_token_id: {max_token_id}")
H = k2.ctc_topo(max_token_id)
if Path(lang_dir / "L_disambig.pt").is_file():
logging.info("Loading L_disambig.pt")
d = torch.load(Path(lang_dir/"L_disambig.pt"))
L = k2.Fsa.from_dict(d)
else:
logging.info("Loading L_disambig.fst.txt")
with open(Path(lang_dir/"L_disambig.fst.txt")) as f:
L = k2.Fsa.from_openfst(f.read(), acceptor=False)
torch.save(L_disambig.as_dict(), Path(lang_dir / "L_disambig.pt"))
#L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
if Path("data/lm/G.pt").is_file():
logging.info("Loading pre-compiled G")
d = torch.load("data/lm/G.pt")
G = k2.Fsa.from_dict(d)
else:
logging.info("Loading G_3_gram.fst.txt")
with open("data/lm/G_3_gram.fst.txt") as f:
G = k2.Fsa.from_openfst(f.read(), acceptor=False)
torch.save(G.as_dict(), "data/lm/G.pt")
first_token_disambig_id = lexicon.token_table["#0"]
first_word_disambig_id = lexicon.word_table["#0"]
L = k2.arc_sort(L)
G = k2.arc_sort(G)
logging.info("Intersecting L and G")
LG = k2.compose(L, G)
logging.info(f"LG shape: {LG.shape}")
logging.info("Connecting LG")
LG = k2.connect(LG)
logging.info(f"LG shape after k2.connect: {LG.shape}")
logging.info(type(LG.aux_labels))
logging.info("Determinizing LG")
LG = k2.determinize(LG)
logging.info(type(LG.aux_labels))
logging.info("Connecting LG after k2.determinize")
LG = k2.connect(LG)
logging.info("Removing disambiguation symbols on LG")
LG.labels[LG.labels >= first_token_disambig_id] = 0
LG.aux_labels.values[LG.aux_labels.values >= first_word_disambig_id] = 0
LG = k2.remove_epsilon(LG)
logging.info(f"LG shape after k2.remove_epsilon: {LG.shape}")
LG = k2.connect(LG)
LG.aux_labels = LG.aux_labels.remove_values_eq(0)
logging.info("Arc sorting LG")
LG = k2.arc_sort(LG)
logging.info("Composing H and LG")
# CAUTION: The name of the inner_labels is fixed
# to `tokens`. If you want to change it, please
# also change other places in icefall that are using
# it.
HLG = k2.compose(H, LG, inner_labels="tokens")
logging.info("Connecting LG")
HLG = k2.connect(HLG)
logging.info("Arc sorting LG")
HLG = k2.arc_sort(HLG)
logging.info(f"HLG.shape: {HLG.shape}")
return HLG
def main():
args = get_args()
lang_dir = Path(args.lang_dir)
if (lang_dir / "HLG.pt").is_file():
logging.info(f"{lang_dir}/HLG.pt already exists - skipping")
return
logging.info(f"Processing {lang_dir}")
HLG = compile_HLG(lang_dir)
logging.info(f"Saving HLG.pt to {lang_dir}")
torch.save(HLG.as_dict(), f"{lang_dir}/HLG.pt")
if __name__ == "__main__":
formatter = (
"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
)
logging.basicConfig(format=formatter, level=logging.INFO)
main()

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python3
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This file computes fbank features of the musan dataset.
It looks for manifests in the directory data/manifests.
The generated fbank features are saved in data/fbank.
"""
import logging
import os
from pathlib import Path
import torch
from lhotse import CutSet, Fbank, FbankConfig, LilcomHdf5Writer, combine
from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor
# Torch's multithreaded behavior needs to be disabled or
# it wastes a lot of CPU and slow things down.
# Do this outside of main() in case it needs to take effect
# even when we are not invoking the main (e.g. when spawning subprocesses).
torch.set_num_threads(1)
torch.set_num_interop_threads(1)
def compute_fbank_musan():
src_dir = Path("data/manifests")
output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count())
num_mel_bins = 80
dataset_parts = (
"music",
"speech",
"noise",
)
manifests = read_manifests_if_cached(
dataset_parts=dataset_parts, output_dir=src_dir
)
assert manifests is not None
musan_cuts_path = output_dir / "cuts_musan.json.gz"
if musan_cuts_path.is_file():
logging.info(f"{musan_cuts_path} already exists - skipping")
return
logging.info("Extracting features for Musan")
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once.
# create chunks of Musan with duration 5 - 10 seconds
musan_cuts = (
CutSet.from_manifests(
recordings=combine(
part["recordings"] for part in manifests.values()
)
)
.cut_into_windows(10.0)
.filter(lambda c: c.duration > 5)
.compute_and_store_features(
extractor=extractor,
storage_path=f"{output_dir}/feats_musan",
num_jobs=num_jobs if ex is None else 80,
executor=ex,
storage_type=LilcomHdf5Writer,
)
)
musan_cuts.to_json(musan_cuts_path)
if __name__ == "__main__":
formatter = (
"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
)
logging.basicConfig(format=formatter, level=logging.INFO)
compute_fbank_musan()

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python3
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang
# Mingshuang Luo)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This file computes fbank features of the LibriSpeech dataset.
It looks for manifests in the directory data/manifests.
The generated fbank features are saved in data/fbank.
"""
import logging
import os
from pathlib import Path
import torch
from lhotse import CutSet, Fbank, FbankConfig, LilcomHdf5Writer
from lhotse.recipes.utils import read_manifests_if_cached
from icefall.utils import get_executor
# Torch's multithreaded behavior needs to be disabled or
# it wastes a lot of CPU and slow things down.
# Do this outside of main() in case it needs to take effect
# even when we are not invoking the main (e.g. when spawning subprocesses).
torch.set_num_threads(1)
torch.set_num_interop_threads(1)
def compute_fbank_timit():
src_dir = Path("data/manifests")
output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count())
num_mel_bins = 80
dataset_parts = (
"TRAIN",
"DEV",
"TEST",
)
manifests = read_manifests_if_cached(
dataset_parts=dataset_parts, output_dir=src_dir
)
assert manifests is not None
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items():
if (output_dir / f"cuts_{partition}.json.gz").is_file():
logging.info(f"{partition} already exists - skipping.")
continue
logging.info(f"Processing {partition}")
cut_set = CutSet.from_manifests(
recordings=m["recordings"],
supervisions=m["supervisions"],
)
if "train" in partition:
cut_set = (
cut_set
+ cut_set.perturb_speed(0.9)
+ cut_set.perturb_speed(1.1)
)
cut_set = cut_set.compute_and_store_features(
extractor=extractor,
storage_path=f"{output_dir}/feats_{partition}",
# when an executor is specified, make more partitions
num_jobs=num_jobs if ex is None else 80,
executor=ex,
storage_type=LilcomHdf5Writer,
)
cut_set.to_json(output_dir / f"cuts_{partition}.json.gz")
if __name__ == "__main__":
formatter = (
"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
)
logging.basicConfig(format=formatter, level=logging.INFO)
compute_fbank_timit()

View File

@ -0,0 +1,394 @@
#!/usr/bin/env python3
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script takes as input a lexicon file "data/lang_phone/lexicon.txt"
consisting of words and tokens (i.e., phones) and does the following:
1. Add disambiguation symbols to the lexicon and generate lexicon_disambig.txt
2. Generate tokens.txt, the token table mapping a token to a unique integer.
3. Generate words.txt, the word table mapping a word to a unique integer.
4. Generate L.pt, in k2 format. It can be loaded by
d = torch.load("L.pt")
lexicon = k2.Fsa.from_dict(d)
5. Generate L_disambig.pt, in k2 format.
"""
import argparse
import math
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, List, Tuple
import k2
import torch
from icefall.lexicon import read_lexicon, write_lexicon
from icefall.utils import str2bool
Lexicon = List[Tuple[str, List[str]]]
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--lang-dir",
type=str,
help="""Input and output directory.
It should contain a file lexicon.txt.
Generated files by this script are saved into this directory.
""",
)
parser.add_argument(
"--debug",
type=str2bool,
default=False,
help="""True for debugging, which will generate
a visualization of the lexicon FST.
Caution: If your lexicon contains hundreds of thousands
of lines, please set it to False!
""",
)
return parser.parse_args()
def write_mapping(filename: str, sym2id: Dict[str, int]) -> None:
"""Write a symbol to ID mapping to a file.
Note:
No need to implement `read_mapping` as it can be done
through :func:`k2.SymbolTable.from_file`.
Args:
filename:
Filename to save the mapping.
sym2id:
A dict mapping symbols to IDs.
Returns:
Return None.
"""
with open(filename, "w", encoding="utf-8") as f:
for sym, i in sym2id.items():
f.write(f"{sym} {i}\n")
def get_tokens(lexicon: Lexicon) -> List[str]:
"""Get tokens from a lexicon.
Args:
lexicon:
It is the return value of :func:`read_lexicon`.
Returns:
Return a list of unique tokens.
"""
ans = set()
for _, tokens in lexicon:
ans.update(tokens)
#sorted_ans = sorted(list(ans))
sorted_ans = list(ans)
return sorted_ans
def get_words(lexicon: Lexicon) -> List[str]:
"""Get words from a lexicon.
Args:
lexicon:
It is the return value of :func:`read_lexicon`.
Returns:
Return a list of unique words.
"""
ans = set()
for word, _ in lexicon:
ans.add(word)
sorted_ans = sorted(list(ans))
return sorted_ans
def add_disambig_symbols(lexicon: Lexicon) -> Tuple[Lexicon, int]:
"""It adds pseudo-token disambiguation symbols #1, #2 and so on
at the ends of tokens to ensure that all pronunciations are different,
and that none is a prefix of another.
See also add_lex_disambig.pl from kaldi.
Args:
lexicon:
It is returned by :func:`read_lexicon`.
Returns:
Return a tuple with two elements:
- The output lexicon with disambiguation symbols
- The ID of the max disambiguation symbol that appears
in the lexicon
"""
# (1) Work out the count of each token-sequence in the
# lexicon.
count = defaultdict(int)
for _, tokens in lexicon:
count[" ".join(tokens)] += 1
# (2) For each left sub-sequence of each token-sequence, note down
# that it exists (for identifying prefixes of longer strings).
issubseq = defaultdict(int)
for _, tokens in lexicon:
tokens = tokens.copy()
tokens.pop()
while tokens:
issubseq[" ".join(tokens)] = 1
tokens.pop()
# (3) For each entry in the lexicon:
# if the token sequence is unique and is not a
# prefix of another word, no disambig symbol.
# Else output #1, or #2, #3, ... if the same token-seq
# has already been assigned a disambig symbol.
ans = []
# We start with #1 since #0 has its own purpose
first_allowed_disambig = 1
max_disambig = first_allowed_disambig - 1
last_used_disambig_symbol_of = defaultdict(int)
for word, tokens in lexicon:
tokenseq = " ".join(tokens)
assert tokenseq != ""
if issubseq[tokenseq] == 0 and count[tokenseq] == 1:
ans.append((word, tokens))
continue
cur_disambig = last_used_disambig_symbol_of[tokenseq]
if cur_disambig == 0:
cur_disambig = first_allowed_disambig
else:
cur_disambig += 1
if cur_disambig > max_disambig:
max_disambig = cur_disambig
last_used_disambig_symbol_of[tokenseq] = cur_disambig
tokenseq += f" #{cur_disambig}"
ans.append((word, tokenseq.split()))
return ans, max_disambig
def generate_id_map(symbols: List[str]) -> Dict[str, int]:
"""Generate ID maps, i.e., map a symbol to a unique ID.
Args:
symbols:
A list of unique symbols.
Returns:
A dict containing the mapping between symbols and IDs.
"""
return {sym: i for i, sym in enumerate(symbols)}
def add_self_loops(
arcs: List[List[Any]], disambig_token: int, disambig_word: int
) -> List[List[Any]]:
"""Adds self-loops to states of an FST to propagate disambiguation symbols
through it. They are added on each state with non-epsilon output symbols
on at least one arc out of the state.
See also fstaddselfloops.pl from Kaldi. One difference is that
Kaldi uses OpenFst style FSTs and it has multiple final states.
This function uses k2 style FSTs and it does not need to add self-loops
to the final state.
The input label of a self-loop is `disambig_token`, while the output
label is `disambig_word`.
Args:
arcs:
A list-of-list. The sublist contains
`[src_state, dest_state, label, aux_label, score]`
disambig_token:
It is the token ID of the symbol `#0`.
disambig_word:
It is the word ID of the symbol `#0`.
Return:
Return new `arcs` containing self-loops.
"""
states_needs_self_loops = set()
for arc in arcs:
src, dst, ilabel, olabel, score = arc
if olabel != 0:
states_needs_self_loops.add(src)
ans = []
for s in states_needs_self_loops:
ans.append([s, s, disambig_token, disambig_word, 0])
return arcs + ans
def lexicon_to_fst(
lexicon: Lexicon,
token2id: Dict[str, int],
word2id: Dict[str, int],
need_self_loops: bool = False,
) -> k2.Fsa:
"""Convert a lexicon to an FST (in k2 format) with optional silence at
the beginning and end of each word.
Args:
lexicon:
The input lexicon. See also :func:`read_lexicon`
token2id:
A dict mapping tokens to IDs.
word2id:
A dict mapping words to IDs.
need_self_loops:
If True, add self-loop to states with non-epsilon output symbols
on at least one arc out of the state. The input label for this
self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
Returns:
Return an instance of `k2.Fsa` representing the given lexicon.
"""
pronprob = 1.0
score = -math.log(pronprob)
loop_state = 0 # words enter and leave from here
next_state = 1 # the next un-allocated state, will be incremented as we go.
arcs = []
print('token2id ori: ', token2id)
print('word2id ori: ', word2id)
assert token2id["<eps>"] == 0
assert word2id["<eps>"] == 0
eps = 0
print('token2id new: ', token2id)
print('word2id new: ', word2id)
print(lexicon)
for word, tokens in lexicon:
assert len(tokens) > 0, f"{word} has no pronunciations"
cur_state = loop_state
word = word2id[word]
tokens = [token2id[i] for i in tokens]
for i in range(len(tokens) - 1):
w = word if i == 0 else eps
arcs.append([cur_state, next_state, tokens[i], w, score])
cur_state = next_state
next_state += 1
# now for the last token of this word
# It has two out-going arcs, one to the loop state,
# the other one to the sil_state.
i = len(tokens) - 1
w = word if i == 0 else eps
tokens[i] = tokens[i] if i >=0 else eps
arcs.append([cur_state, loop_state, tokens[i], w, score])
if need_self_loops:
disambig_token = token2id["#0"]
disambig_word = word2id["#0"]
arcs = add_self_loops(
arcs,
disambig_token=disambig_token,
disambig_word=disambig_word,
)
final_state = next_state
arcs.append([loop_state, final_state, -1, -1, 0])
arcs.append([final_state])
arcs = sorted(arcs, key=lambda arc: arc[0])
arcs = [[str(i) for i in arc] for arc in arcs]
arcs = [" ".join(arc) for arc in arcs]
arcs = "\n".join(arcs)
print(arcs)
fsa = k2.Fsa.from_str(arcs, acceptor=False)
return fsa
def main():
args = get_args()
lang_dir = Path(args.lang_dir)
#out_dir = Path("data/lang_phone")
lexicon_filename = lang_dir / "lexicon.txt"
lexicon = read_lexicon(lexicon_filename)
tokens = get_tokens(lexicon)
words = get_words(lexicon)
lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
for i in range(max_disambig + 1):
disambig = f"#{i}"
assert disambig not in tokens
tokens.append(f"#{i}")
assert "<eps>" not in tokens
tokens = ["<eps>"] + tokens
assert "<eps>" not in words
assert "#0" not in words
assert "<s>" not in words
assert "</s>" not in words
words = ["<eps>"] + words + ["#0", "<s>", "</s>"]
token2id = generate_id_map(tokens)
word2id = generate_id_map(words)
write_mapping(lang_dir / "tokens.txt", token2id)
write_mapping(lang_dir / "words.txt", word2id)
write_lexicon(lang_dir / "lexicon_disambig.txt", lexicon_disambig)
L = lexicon_to_fst(
lexicon,
token2id=token2id,
word2id=word2id,
)
L_disambig = lexicon_to_fst(
lexicon_disambig,
token2id=token2id,
word2id=word2id,
need_self_loops=True,
)
torch.save(L.as_dict(), lang_dir / "L.pt")
torch.save(L_disambig.as_dict(), lang_dir / "L_disambig.pt")
if False:
# Just for debugging, will remove it
L.labels_sym = k2.SymbolTable.from_file(lang_dir / "tokens.txt")
L.aux_labels_sym = k2.SymbolTable.from_file(lang_dir / "words.txt")
L_disambig.labels_sym = L.labels_sym
L_disambig.aux_labels_sym = L.aux_labels_sym
L.draw(out_dir / "L.png", title="L")
L_disambig.draw(lang_dir / "L_disambig.png", title="L_disambig")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,106 @@
#!/usr/bin/env python3
# Copyright 2021 Xiaomi Corp. (authors: Mingshuang Luo)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This script takes as input supervisions json dir "data/manifests"
consisting of supervisions_TRAIN.json and does the following:
1. Generate lexicon.txt.
"""
import argparse
import json
import logging
from pathlib import Path
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--manifests-dir",
type=str,
help="""Input directory.
""",
)
parser.add_argument(
"--lang-dir",
type=str,
help="""Output directory.
""",
)
return parser.parse_args()
def prepare_lexicon(manifests_dir: str, lang_dir: str):
"""
Args:
manifests_dir:
The manifests directory, e.g., data/manifests.
lang_dir:
The language directory, e.g., data/lang_phone.
Return:
The lexicon.txt file and the train.text in lang_dir.
"""
phones = []
supervisions_train = Path(manifests_dir) / "supervisions_TRAIN.json"
lexicon = Path(lang_dir) / "lexicon.txt"
logging.info(f"Loading {supervisions_train}!")
with open(supervisions_train, 'r') as load_f:
load_dicts = json.load(load_f)
for load_dict in load_dicts:
idx = load_dict['id']
text = load_dict['text']
phones_list = list(filter(None, text.split(' ')))
for phone in phones_list:
if phone not in phones:
phones.append(phone)
with open(lexicon, 'w') as f:
for phone in sorted(phones):
f.write(str(phone) + " " + str(phone))
f.write('\n')
f.write("<UNK> <UNK>")
f.write('\n')
return lexicon
def main():
args = get_args()
manifests_dir = Path(args.manifests_dir)
lang_dir = Path(args.lang_dir)
logging.info(f"Generating lexicon.txt and train.text")
lexicon_file = prepare_lexicon(manifests_dir, lang_dir)
if __name__ == "__main__":
formatter = (
"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
)
logging.basicConfig(format=formatter, level=logging.INFO)
main()

151
egs/timit/ASR/prepare.sh Normal file
View File

@ -0,0 +1,151 @@
#!/usr/bin/env bash
set -eou pipefail
num_phones=39
# Here we use num_phones=39 for modeling
nj=15
stage=-1
stop_stage=100
# We assume dl_dir (download dir) contains the following
# directories and files. If not, they will be downloaded
# by this script automatically.
#
# - $dl_dir/timit
# You can find data, train_data.csv, test_data.csv, etc, inside it.
# You can download them from https://data.deepai.org/timit.zip
#
# - $dl_dir/lm
# This directory contains the language model(LM) downloaded from
# https://huggingface.co/luomingshuang/timit_lm, and the LM is based
# on 39 phones.
#
# - lm_tgmed.arpa
#
# - $dl_dir/musan
# This directory contains the following directories downloaded from
# http://www.openslr.org/17/
#
# - music
# - noise
# - speech
dl_dir=$PWD/download
splits_dir=$PWD/splits_dir
. shared/parse_options.sh || exit 1
# All files generated by this script are saved in "data".
# You can safely remove "data" and rerun this script to regenerate it.
mkdir -p data
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
log "dl_dir: $dl_dir"
if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
log "Stage -1: Download LM"
# We assume that you have installed the git-lfs, if not, you could install it
# using: `sudo apt-get install git-lfs && git-lfs install`
[ ! -e $dl_dir/lm ] && mkdir -p $dl_dir/lm
git clone https://huggingface.co/luomingshuang/timit_lm $dl_dir/lm
fi
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
log "Stage 0: Download data"
# If you have pre-downloaded it to /path/to/timit,
# you can create a symlink
#
# ln -sfv /path/to/timit $dl_dir/timit
#
if [ ! -d $dl_dir/timit ]; then
lhotse download timit $dl_dir
fi
# If you have pre-downloaded it to /path/to/musan,
# you can create a symlink
#
# ln -sfv /path/to/musan $dl_dir/
#
if [ ! -d $dl_dir/musan ]; then
lhotse download musan $dl_dir
fi
fi
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
log "Stage 1: Prepare timit manifest"
# We assume that you have downloaded the timit corpus
# to $dl_dir/timit
mkdir -p data/manifests
lhotse prepare timit -p $num_phones -j $nj $dl_dir/timit/data $splits_dir data/manifests
fi
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Prepare musan manifest"
# We assume that you have downloaded the musan corpus
# to data/musan
mkdir -p data/manifests
lhotse prepare musan $dl_dir/musan data/manifests
fi
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Compute fbank for librispeech"
mkdir -p data/fbank
./local/compute_fbank_timit.py
fi
if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
log "Stage 4: Compute fbank for musan"
mkdir -p data/fbank
./local/compute_fbank_musan.py
fi
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Prepare phone based lang"
lang_dir=data/lang_phone
mkdir -p $lang_dir
./local/prepare_lexicon.py \
--manifests-dir data/manifests \
--lang-dir $lang_dir
if [ ! -f $lang_dir/L_disambig.pt ]; then
./local/prepare_lang.py --lang-dir $lang_dir
fi
fi
if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
log "Stage 6: Prepare G"
# We assume you have install kaldilm, if not, please install
# it using: pip install kaldilm
mkdir -p data/lm
if [ ! -f data/lm/G_3_gram.fst.txt ]; then
# It is used in building HLG
python3 -m kaldilm \
--read-symbol-table="data/lang_phone/words.txt" \
--disambig-symbol='#0' \
--max-order=3 \
$dl_dir/lm/lm_tgmed.arpa > data/lm/G_3_gram.fst.txt
fi
if [ ! -f data/lm/G_4_gram.fst.txt ]; then
# It is used for LM rescoring
python3 -m kaldilm \
--read-symbol-table="data/lang_phone/words.txt" \
--disambig-symbol='#0' \
--max-order=4 \
$dl_dir/lm/lm_tgmed.arpa > data/lm/G_4_gram.fst.txt
fi
fi
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
log "Stage 7: Compile HLG"
./local/compile_hlg.py --lang-dir data/lang_phone
fi

View File

@ -0,0 +1,97 @@
#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey);
# Arnab Ghoshal, Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Parse command-line options.
# To be sourced by another script (as in ". parse_options.sh").
# Option format is: --option-name arg
# and shell variable "option_name" gets set to value "arg."
# The exception is --help, which takes no arguments, but prints the
# $help_message variable (if defined).
###
### The --config file options have lower priority to command line
### options, so we need to import them first...
###
# Now import all the configs specified by command-line, in left-to-right order
for ((argpos=1; argpos<$#; argpos++)); do
if [ "${!argpos}" == "--config" ]; then
argpos_plus1=$((argpos+1))
config=${!argpos_plus1}
[ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
. $config # source the config file.
fi
done
###
### Now we process the command line options
###
while true; do
[ -z "${1:-}" ] && break; # break if there are no arguments
case "$1" in
# If the enclosing script is called with --help option, print the help
# message and exit. Scripts should put help messages in $help_message
--help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
else printf "$help_message\n" 1>&2 ; fi;
exit 0 ;;
--*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
exit 1 ;;
# If the first command-line argument begins with "--" (e.g. --foo-bar),
# then work out the variable name as $name, which will equal "foo_bar".
--*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
# Next we test whether the variable in question is undefned-- if so it's
# an invalid option and we die. Note: $0 evaluates to the name of the
# enclosing script.
# The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
# is undefined. We then have to wrap this test inside "eval" because
# foo_bar is itself inside a variable ($name).
eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
oldval="`eval echo \\$$name`";
# Work out whether we seem to be expecting a Boolean argument.
if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
was_bool=true;
else
was_bool=false;
fi
# Set the variable to the right value-- the escaped quotes make it work if
# the option had spaces, like --cmd "queue.pl -sync y"
eval $name=\"$2\";
# Check that Boolean-valued arguments are really Boolean.
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
exit 1;
fi
shift 2;
;;
*) break;
esac
done
# Check for an empty argument to the --cmd option, which can easily occur as a
# result of scripting errors.
[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
true; # so this script returns exit code 0.

View File

@ -0,0 +1,400 @@
FADG0_SI1279 TEST/DR4/FADG0/SI1279.WAV
FADG0_SI1909 TEST/DR4/FADG0/SI1909.WAV
FADG0_SI649 TEST/DR4/FADG0/SI649.WAV
FADG0_SX109 TEST/DR4/FADG0/SX109.WAV
FADG0_SX19 TEST/DR4/FADG0/SX19.WAV
FADG0_SX199 TEST/DR4/FADG0/SX199.WAV
FADG0_SX289 TEST/DR4/FADG0/SX289.WAV
FADG0_SX379 TEST/DR4/FADG0/SX379.WAV
FAKS0_SI1573 TEST/DR1/FAKS0/SI1573.WAV
FAKS0_SI2203 TEST/DR1/FAKS0/SI2203.WAV
FAKS0_SI943 TEST/DR1/FAKS0/SI943.WAV
FAKS0_SX133 TEST/DR1/FAKS0/SX133.WAV
FAKS0_SX223 TEST/DR1/FAKS0/SX223.WAV
FAKS0_SX313 TEST/DR1/FAKS0/SX313.WAV
FAKS0_SX403 TEST/DR1/FAKS0/SX403.WAV
FAKS0_SX43 TEST/DR1/FAKS0/SX43.WAV
FCAL1_SI1403 TEST/DR5/FCAL1/SI1403.WAV
FCAL1_SI2033 TEST/DR5/FCAL1/SI2033.WAV
FCAL1_SI773 TEST/DR5/FCAL1/SI773.WAV
FCAL1_SX143 TEST/DR5/FCAL1/SX143.WAV
FCAL1_SX233 TEST/DR5/FCAL1/SX233.WAV
FCAL1_SX323 TEST/DR5/FCAL1/SX323.WAV
FCAL1_SX413 TEST/DR5/FCAL1/SX413.WAV
FCAL1_SX53 TEST/DR5/FCAL1/SX53.WAV
FCMH0_SI1454 TEST/DR3/FCMH0/SI1454.WAV
FCMH0_SI2084 TEST/DR3/FCMH0/SI2084.WAV
FCMH0_SI824 TEST/DR3/FCMH0/SI824.WAV
FCMH0_SX104 TEST/DR3/FCMH0/SX104.WAV
FCMH0_SX14 TEST/DR3/FCMH0/SX14.WAV
FCMH0_SX194 TEST/DR3/FCMH0/SX194.WAV
FCMH0_SX284 TEST/DR3/FCMH0/SX284.WAV
FCMH0_SX374 TEST/DR3/FCMH0/SX374.WAV
FDAC1_SI1474 TEST/DR1/FDAC1/SI1474.WAV
FDAC1_SI2104 TEST/DR1/FDAC1/SI2104.WAV
FDAC1_SI844 TEST/DR1/FDAC1/SI844.WAV
FDAC1_SX124 TEST/DR1/FDAC1/SX124.WAV
FDAC1_SX214 TEST/DR1/FDAC1/SX214.WAV
FDAC1_SX304 TEST/DR1/FDAC1/SX304.WAV
FDAC1_SX34 TEST/DR1/FDAC1/SX34.WAV
FDAC1_SX394 TEST/DR1/FDAC1/SX394.WAV
FDMS0_SI1218 TEST/DR4/FDMS0/SI1218.WAV
FDMS0_SI1502 TEST/DR4/FDMS0/SI1502.WAV
FDMS0_SI1848 TEST/DR4/FDMS0/SI1848.WAV
FDMS0_SX138 TEST/DR4/FDMS0/SX138.WAV
FDMS0_SX228 TEST/DR4/FDMS0/SX228.WAV
FDMS0_SX318 TEST/DR4/FDMS0/SX318.WAV
FDMS0_SX408 TEST/DR4/FDMS0/SX408.WAV
FDMS0_SX48 TEST/DR4/FDMS0/SX48.WAV
FDRW0_SI1283 TEST/DR6/FDRW0/SI1283.WAV
FDRW0_SI1423 TEST/DR6/FDRW0/SI1423.WAV
FDRW0_SI653 TEST/DR6/FDRW0/SI653.WAV
FDRW0_SX113 TEST/DR6/FDRW0/SX113.WAV
FDRW0_SX203 TEST/DR6/FDRW0/SX203.WAV
FDRW0_SX23 TEST/DR6/FDRW0/SX23.WAV
FDRW0_SX293 TEST/DR6/FDRW0/SX293.WAV
FDRW0_SX383 TEST/DR6/FDRW0/SX383.WAV
FEDW0_SI1084 TEST/DR4/FEDW0/SI1084.WAV
FEDW0_SI1653 TEST/DR4/FEDW0/SI1653.WAV
FEDW0_SI1714 TEST/DR4/FEDW0/SI1714.WAV
FEDW0_SX184 TEST/DR4/FEDW0/SX184.WAV
FEDW0_SX274 TEST/DR4/FEDW0/SX274.WAV
FEDW0_SX364 TEST/DR4/FEDW0/SX364.WAV
FEDW0_SX4 TEST/DR4/FEDW0/SX4.WAV
FEDW0_SX94 TEST/DR4/FEDW0/SX94.WAV
FGJD0_SI1179 TEST/DR4/FGJD0/SI1179.WAV
FGJD0_SI549 TEST/DR4/FGJD0/SI549.WAV
FGJD0_SI818 TEST/DR4/FGJD0/SI818.WAV
FGJD0_SX189 TEST/DR4/FGJD0/SX189.WAV
FGJD0_SX279 TEST/DR4/FGJD0/SX279.WAV
FGJD0_SX369 TEST/DR4/FGJD0/SX369.WAV
FGJD0_SX9 TEST/DR4/FGJD0/SX9.WAV
FGJD0_SX99 TEST/DR4/FGJD0/SX99.WAV
FJEM0_SI1264 TEST/DR1/FJEM0/SI1264.WAV
FJEM0_SI1894 TEST/DR1/FJEM0/SI1894.WAV
FJEM0_SI634 TEST/DR1/FJEM0/SI634.WAV
FJEM0_SX184 TEST/DR1/FJEM0/SX184.WAV
FJEM0_SX274 TEST/DR1/FJEM0/SX274.WAV
FJEM0_SX364 TEST/DR1/FJEM0/SX364.WAV
FJEM0_SX4 TEST/DR1/FJEM0/SX4.WAV
FJEM0_SX94 TEST/DR1/FJEM0/SX94.WAV
FJMG0_SI1181 TEST/DR4/FJMG0/SI1181.WAV
FJMG0_SI1811 TEST/DR4/FJMG0/SI1811.WAV
FJMG0_SI551 TEST/DR4/FJMG0/SI551.WAV
FJMG0_SX101 TEST/DR4/FJMG0/SX101.WAV
FJMG0_SX11 TEST/DR4/FJMG0/SX11.WAV
FJMG0_SX191 TEST/DR4/FJMG0/SX191.WAV
FJMG0_SX281 TEST/DR4/FJMG0/SX281.WAV
FJMG0_SX371 TEST/DR4/FJMG0/SX371.WAV
FJSJ0_SI1484 TEST/DR8/FJSJ0/SI1484.WAV
FJSJ0_SI2114 TEST/DR8/FJSJ0/SI2114.WAV
FJSJ0_SI854 TEST/DR8/FJSJ0/SI854.WAV
FJSJ0_SX134 TEST/DR8/FJSJ0/SX134.WAV
FJSJ0_SX224 TEST/DR8/FJSJ0/SX224.WAV
FJSJ0_SX314 TEST/DR8/FJSJ0/SX314.WAV
FJSJ0_SX404 TEST/DR8/FJSJ0/SX404.WAV
FJSJ0_SX44 TEST/DR8/FJSJ0/SX44.WAV
FKMS0_SI1490 TEST/DR3/FKMS0/SI1490.WAV
FKMS0_SI2120 TEST/DR3/FKMS0/SI2120.WAV
FKMS0_SI860 TEST/DR3/FKMS0/SI860.WAV
FKMS0_SX140 TEST/DR3/FKMS0/SX140.WAV
FKMS0_SX230 TEST/DR3/FKMS0/SX230.WAV
FKMS0_SX320 TEST/DR3/FKMS0/SX320.WAV
FKMS0_SX410 TEST/DR3/FKMS0/SX410.WAV
FKMS0_SX50 TEST/DR3/FKMS0/SX50.WAV
FMAH0_SI1289 TEST/DR5/FMAH0/SI1289.WAV
FMAH0_SI1919 TEST/DR5/FMAH0/SI1919.WAV
FMAH0_SI659 TEST/DR5/FMAH0/SI659.WAV
FMAH0_SX119 TEST/DR5/FMAH0/SX119.WAV
FMAH0_SX209 TEST/DR5/FMAH0/SX209.WAV
FMAH0_SX29 TEST/DR5/FMAH0/SX29.WAV
FMAH0_SX299 TEST/DR5/FMAH0/SX299.WAV
FMAH0_SX389 TEST/DR5/FMAH0/SX389.WAV
FMML0_SI1040 TEST/DR7/FMML0/SI1040.WAV
FMML0_SI1670 TEST/DR7/FMML0/SI1670.WAV
FMML0_SI2300 TEST/DR7/FMML0/SI2300.WAV
FMML0_SX140 TEST/DR7/FMML0/SX140.WAV
FMML0_SX230 TEST/DR7/FMML0/SX230.WAV
FMML0_SX320 TEST/DR7/FMML0/SX320.WAV
FMML0_SX410 TEST/DR7/FMML0/SX410.WAV
FMML0_SX50 TEST/DR7/FMML0/SX50.WAV
FNMR0_SI1399 TEST/DR4/FNMR0/SI1399.WAV
FNMR0_SI2029 TEST/DR4/FNMR0/SI2029.WAV
FNMR0_SI769 TEST/DR4/FNMR0/SI769.WAV
FNMR0_SX139 TEST/DR4/FNMR0/SX139.WAV
FNMR0_SX229 TEST/DR4/FNMR0/SX229.WAV
FNMR0_SX319 TEST/DR4/FNMR0/SX319.WAV
FNMR0_SX409 TEST/DR4/FNMR0/SX409.WAV
FNMR0_SX49 TEST/DR4/FNMR0/SX49.WAV
FREW0_SI1030 TEST/DR4/FREW0/SI1030.WAV
FREW0_SI1280 TEST/DR4/FREW0/SI1280.WAV
FREW0_SI1910 TEST/DR4/FREW0/SI1910.WAV
FREW0_SX110 TEST/DR4/FREW0/SX110.WAV
FREW0_SX20 TEST/DR4/FREW0/SX20.WAV
FREW0_SX200 TEST/DR4/FREW0/SX200.WAV
FREW0_SX290 TEST/DR4/FREW0/SX290.WAV
FREW0_SX380 TEST/DR4/FREW0/SX380.WAV
FSEM0_SI1198 TEST/DR4/FSEM0/SI1198.WAV
FSEM0_SI1828 TEST/DR4/FSEM0/SI1828.WAV
FSEM0_SI568 TEST/DR4/FSEM0/SI568.WAV
FSEM0_SX118 TEST/DR4/FSEM0/SX118.WAV
FSEM0_SX208 TEST/DR4/FSEM0/SX208.WAV
FSEM0_SX28 TEST/DR4/FSEM0/SX28.WAV
FSEM0_SX298 TEST/DR4/FSEM0/SX298.WAV
FSEM0_SX388 TEST/DR4/FSEM0/SX388.WAV
MAJC0_SI1946 TEST/DR8/MAJC0/SI1946.WAV
MAJC0_SI2095 TEST/DR8/MAJC0/SI2095.WAV
MAJC0_SI835 TEST/DR8/MAJC0/SI835.WAV
MAJC0_SX115 TEST/DR8/MAJC0/SX115.WAV
MAJC0_SX205 TEST/DR8/MAJC0/SX205.WAV
MAJC0_SX25 TEST/DR8/MAJC0/SX25.WAV
MAJC0_SX295 TEST/DR8/MAJC0/SX295.WAV
MAJC0_SX385 TEST/DR8/MAJC0/SX385.WAV
MBDG0_SI1463 TEST/DR3/MBDG0/SI1463.WAV
MBDG0_SI2093 TEST/DR3/MBDG0/SI2093.WAV
MBDG0_SI833 TEST/DR3/MBDG0/SI833.WAV
MBDG0_SX113 TEST/DR3/MBDG0/SX113.WAV
MBDG0_SX203 TEST/DR3/MBDG0/SX203.WAV
MBDG0_SX23 TEST/DR3/MBDG0/SX23.WAV
MBDG0_SX293 TEST/DR3/MBDG0/SX293.WAV
MBDG0_SX383 TEST/DR3/MBDG0/SX383.WAV
MBNS0_SI1220 TEST/DR4/MBNS0/SI1220.WAV
MBNS0_SI1850 TEST/DR4/MBNS0/SI1850.WAV
MBNS0_SI590 TEST/DR4/MBNS0/SI590.WAV
MBNS0_SX140 TEST/DR4/MBNS0/SX140.WAV
MBNS0_SX230 TEST/DR4/MBNS0/SX230.WAV
MBNS0_SX320 TEST/DR4/MBNS0/SX320.WAV
MBNS0_SX410 TEST/DR4/MBNS0/SX410.WAV
MBNS0_SX50 TEST/DR4/MBNS0/SX50.WAV
MBWM0_SI1304 TEST/DR3/MBWM0/SI1304.WAV
MBWM0_SI1934 TEST/DR3/MBWM0/SI1934.WAV
MBWM0_SI674 TEST/DR3/MBWM0/SI674.WAV
MBWM0_SX134 TEST/DR3/MBWM0/SX134.WAV
MBWM0_SX224 TEST/DR3/MBWM0/SX224.WAV
MBWM0_SX314 TEST/DR3/MBWM0/SX314.WAV
MBWM0_SX404 TEST/DR3/MBWM0/SX404.WAV
MBWM0_SX44 TEST/DR3/MBWM0/SX44.WAV
MCSH0_SI1549 TEST/DR3/MCSH0/SI1549.WAV
MCSH0_SI2179 TEST/DR3/MCSH0/SI2179.WAV
MCSH0_SI919 TEST/DR3/MCSH0/SI919.WAV
MCSH0_SX109 TEST/DR3/MCSH0/SX109.WAV
MCSH0_SX19 TEST/DR3/MCSH0/SX19.WAV
MCSH0_SX199 TEST/DR3/MCSH0/SX199.WAV
MCSH0_SX289 TEST/DR3/MCSH0/SX289.WAV
MCSH0_SX379 TEST/DR3/MCSH0/SX379.WAV
MDLF0_SI1583 TEST/DR7/MDLF0/SI1583.WAV
MDLF0_SI2213 TEST/DR7/MDLF0/SI2213.WAV
MDLF0_SI953 TEST/DR7/MDLF0/SI953.WAV
MDLF0_SX143 TEST/DR7/MDLF0/SX143.WAV
MDLF0_SX233 TEST/DR7/MDLF0/SX233.WAV
MDLF0_SX323 TEST/DR7/MDLF0/SX323.WAV
MDLF0_SX413 TEST/DR7/MDLF0/SX413.WAV
MDLF0_SX53 TEST/DR7/MDLF0/SX53.WAV
MDLS0_SI1628 TEST/DR4/MDLS0/SI1628.WAV
MDLS0_SI2258 TEST/DR4/MDLS0/SI2258.WAV
MDLS0_SI998 TEST/DR4/MDLS0/SI998.WAV
MDLS0_SX188 TEST/DR4/MDLS0/SX188.WAV
MDLS0_SX278 TEST/DR4/MDLS0/SX278.WAV
MDLS0_SX368 TEST/DR4/MDLS0/SX368.WAV
MDLS0_SX8 TEST/DR4/MDLS0/SX8.WAV
MDLS0_SX98 TEST/DR4/MDLS0/SX98.WAV
MDVC0_SI2174 TEST/DR7/MDVC0/SI2174.WAV
MDVC0_SI2196 TEST/DR7/MDVC0/SI2196.WAV
MDVC0_SI936 TEST/DR7/MDVC0/SI936.WAV
MDVC0_SX126 TEST/DR7/MDVC0/SX126.WAV
MDVC0_SX216 TEST/DR7/MDVC0/SX216.WAV
MDVC0_SX306 TEST/DR7/MDVC0/SX306.WAV
MDVC0_SX36 TEST/DR7/MDVC0/SX36.WAV
MDVC0_SX396 TEST/DR7/MDVC0/SX396.WAV
MERS0_SI1019 TEST/DR7/MERS0/SI1019.WAV
MERS0_SI1649 TEST/DR7/MERS0/SI1649.WAV
MERS0_SI497 TEST/DR7/MERS0/SI497.WAV
MERS0_SX119 TEST/DR7/MERS0/SX119.WAV
MERS0_SX209 TEST/DR7/MERS0/SX209.WAV
MERS0_SX29 TEST/DR7/MERS0/SX29.WAV
MERS0_SX299 TEST/DR7/MERS0/SX299.WAV
MERS0_SX389 TEST/DR7/MERS0/SX389.WAV
MGJF0_SI1901 TEST/DR3/MGJF0/SI1901.WAV
MGJF0_SI641 TEST/DR3/MGJF0/SI641.WAV
MGJF0_SI776 TEST/DR3/MGJF0/SI776.WAV
MGJF0_SX101 TEST/DR3/MGJF0/SX101.WAV
MGJF0_SX11 TEST/DR3/MGJF0/SX11.WAV
MGJF0_SX191 TEST/DR3/MGJF0/SX191.WAV
MGJF0_SX281 TEST/DR3/MGJF0/SX281.WAV
MGJF0_SX371 TEST/DR3/MGJF0/SX371.WAV
MGLB0_SI1534 TEST/DR3/MGLB0/SI1534.WAV
MGLB0_SI2164 TEST/DR3/MGLB0/SI2164.WAV
MGLB0_SI904 TEST/DR3/MGLB0/SI904.WAV
MGLB0_SX184 TEST/DR3/MGLB0/SX184.WAV
MGLB0_SX274 TEST/DR3/MGLB0/SX274.WAV
MGLB0_SX364 TEST/DR3/MGLB0/SX364.WAV
MGLB0_SX4 TEST/DR3/MGLB0/SX4.WAV
MGLB0_SX94 TEST/DR3/MGLB0/SX94.WAV
MGWT0_SI1539 TEST/DR2/MGWT0/SI1539.WAV
MGWT0_SI2169 TEST/DR2/MGWT0/SI2169.WAV
MGWT0_SI909 TEST/DR2/MGWT0/SI909.WAV
MGWT0_SX189 TEST/DR2/MGWT0/SX189.WAV
MGWT0_SX279 TEST/DR2/MGWT0/SX279.WAV
MGWT0_SX369 TEST/DR2/MGWT0/SX369.WAV
MGWT0_SX9 TEST/DR2/MGWT0/SX9.WAV
MGWT0_SX99 TEST/DR2/MGWT0/SX99.WAV
MJAR0_SI1988 TEST/DR2/MJAR0/SI1988.WAV
MJAR0_SI2247 TEST/DR2/MJAR0/SI2247.WAV
MJAR0_SI728 TEST/DR2/MJAR0/SI728.WAV
MJAR0_SX188 TEST/DR2/MJAR0/SX188.WAV
MJAR0_SX278 TEST/DR2/MJAR0/SX278.WAV
MJAR0_SX368 TEST/DR2/MJAR0/SX368.WAV
MJAR0_SX8 TEST/DR2/MJAR0/SX8.WAV
MJAR0_SX98 TEST/DR2/MJAR0/SX98.WAV
MJFC0_SI1033 TEST/DR6/MJFC0/SI1033.WAV
MJFC0_SI1663 TEST/DR6/MJFC0/SI1663.WAV
MJFC0_SI2293 TEST/DR6/MJFC0/SI2293.WAV
MJFC0_SX133 TEST/DR6/MJFC0/SX133.WAV
MJFC0_SX223 TEST/DR6/MJFC0/SX223.WAV
MJFC0_SX313 TEST/DR6/MJFC0/SX313.WAV
MJFC0_SX403 TEST/DR6/MJFC0/SX403.WAV
MJFC0_SX43 TEST/DR6/MJFC0/SX43.WAV
MJSW0_SI1010 TEST/DR1/MJSW0/SI1010.WAV
MJSW0_SI1640 TEST/DR1/MJSW0/SI1640.WAV
MJSW0_SI2270 TEST/DR1/MJSW0/SI2270.WAV
MJSW0_SX110 TEST/DR1/MJSW0/SX110.WAV
MJSW0_SX20 TEST/DR1/MJSW0/SX20.WAV
MJSW0_SX200 TEST/DR1/MJSW0/SX200.WAV
MJSW0_SX290 TEST/DR1/MJSW0/SX290.WAV
MJSW0_SX380 TEST/DR1/MJSW0/SX380.WAV
MMDB1_SI1625 TEST/DR2/MMDB1/SI1625.WAV
MMDB1_SI2255 TEST/DR2/MMDB1/SI2255.WAV
MMDB1_SI995 TEST/DR2/MMDB1/SI995.WAV
MMDB1_SX185 TEST/DR2/MMDB1/SX185.WAV
MMDB1_SX275 TEST/DR2/MMDB1/SX275.WAV
MMDB1_SX365 TEST/DR2/MMDB1/SX365.WAV
MMDB1_SX5 TEST/DR2/MMDB1/SX5.WAV
MMDB1_SX95 TEST/DR2/MMDB1/SX95.WAV
MMDM2_SI1452 TEST/DR2/MMDM2/SI1452.WAV
MMDM2_SI1555 TEST/DR2/MMDM2/SI1555.WAV
MMDM2_SI2082 TEST/DR2/MMDM2/SI2082.WAV
MMDM2_SX102 TEST/DR2/MMDM2/SX102.WAV
MMDM2_SX12 TEST/DR2/MMDM2/SX12.WAV
MMDM2_SX192 TEST/DR2/MMDM2/SX192.WAV
MMDM2_SX282 TEST/DR2/MMDM2/SX282.WAV
MMDM2_SX372 TEST/DR2/MMDM2/SX372.WAV
MMJR0_SI1648 TEST/DR3/MMJR0/SI1648.WAV
MMJR0_SI2166 TEST/DR3/MMJR0/SI2166.WAV
MMJR0_SI2278 TEST/DR3/MMJR0/SI2278.WAV
MMJR0_SX118 TEST/DR3/MMJR0/SX118.WAV
MMJR0_SX208 TEST/DR3/MMJR0/SX208.WAV
MMJR0_SX28 TEST/DR3/MMJR0/SX28.WAV
MMJR0_SX298 TEST/DR3/MMJR0/SX298.WAV
MMJR0_SX388 TEST/DR3/MMJR0/SX388.WAV
MMWH0_SI1089 TEST/DR3/MMWH0/SI1089.WAV
MMWH0_SI1301 TEST/DR3/MMWH0/SI1301.WAV
MMWH0_SI459 TEST/DR3/MMWH0/SI459.WAV
MMWH0_SX189 TEST/DR3/MMWH0/SX189.WAV
MMWH0_SX279 TEST/DR3/MMWH0/SX279.WAV
MMWH0_SX369 TEST/DR3/MMWH0/SX369.WAV
MMWH0_SX9 TEST/DR3/MMWH0/SX9.WAV
MMWH0_SX99 TEST/DR3/MMWH0/SX99.WAV
MPDF0_SI1542 TEST/DR2/MPDF0/SI1542.WAV
MPDF0_SI2172 TEST/DR2/MPDF0/SI2172.WAV
MPDF0_SI912 TEST/DR2/MPDF0/SI912.WAV
MPDF0_SX102 TEST/DR2/MPDF0/SX102.WAV
MPDF0_SX12 TEST/DR2/MPDF0/SX12.WAV
MPDF0_SX192 TEST/DR2/MPDF0/SX192.WAV
MPDF0_SX282 TEST/DR2/MPDF0/SX282.WAV
MPDF0_SX372 TEST/DR2/MPDF0/SX372.WAV
MRCS0_SI1223 TEST/DR7/MRCS0/SI1223.WAV
MRCS0_SI1853 TEST/DR7/MRCS0/SI1853.WAV
MRCS0_SI593 TEST/DR7/MRCS0/SI593.WAV
MRCS0_SX143 TEST/DR7/MRCS0/SX143.WAV
MRCS0_SX233 TEST/DR7/MRCS0/SX233.WAV
MRCS0_SX323 TEST/DR7/MRCS0/SX323.WAV
MRCS0_SX413 TEST/DR7/MRCS0/SX413.WAV
MRCS0_SX53 TEST/DR7/MRCS0/SX53.WAV
MREB0_SI1375 TEST/DR1/MREB0/SI1375.WAV
MREB0_SI2005 TEST/DR1/MREB0/SI2005.WAV
MREB0_SI745 TEST/DR1/MREB0/SI745.WAV
MREB0_SX115 TEST/DR1/MREB0/SX115.WAV
MREB0_SX205 TEST/DR1/MREB0/SX205.WAV
MREB0_SX25 TEST/DR1/MREB0/SX25.WAV
MREB0_SX295 TEST/DR1/MREB0/SX295.WAV
MREB0_SX385 TEST/DR1/MREB0/SX385.WAV
MRJM4_SI1489 TEST/DR7/MRJM4/SI1489.WAV
MRJM4_SI2119 TEST/DR7/MRJM4/SI2119.WAV
MRJM4_SI859 TEST/DR7/MRJM4/SI859.WAV
MRJM4_SX139 TEST/DR7/MRJM4/SX139.WAV
MRJM4_SX229 TEST/DR7/MRJM4/SX229.WAV
MRJM4_SX319 TEST/DR7/MRJM4/SX319.WAV
MRJM4_SX409 TEST/DR7/MRJM4/SX409.WAV
MRJM4_SX49 TEST/DR7/MRJM4/SX49.WAV
MRJR0_SI1182 TEST/DR6/MRJR0/SI1182.WAV
MRJR0_SI1812 TEST/DR6/MRJR0/SI1812.WAV
MRJR0_SI2313 TEST/DR6/MRJR0/SI2313.WAV
MRJR0_SX102 TEST/DR6/MRJR0/SX102.WAV
MRJR0_SX12 TEST/DR6/MRJR0/SX12.WAV
MRJR0_SX192 TEST/DR6/MRJR0/SX192.WAV
MRJR0_SX282 TEST/DR6/MRJR0/SX282.WAV
MRJR0_SX372 TEST/DR6/MRJR0/SX372.WAV
MROA0_SI1307 TEST/DR4/MROA0/SI1307.WAV
MROA0_SI1970 TEST/DR4/MROA0/SI1970.WAV
MROA0_SI677 TEST/DR4/MROA0/SI677.WAV
MROA0_SX137 TEST/DR4/MROA0/SX137.WAV
MROA0_SX227 TEST/DR4/MROA0/SX227.WAV
MROA0_SX317 TEST/DR4/MROA0/SX317.WAV
MROA0_SX407 TEST/DR4/MROA0/SX407.WAV
MROA0_SX47 TEST/DR4/MROA0/SX47.WAV
MRTK0_SI1093 TEST/DR3/MRTK0/SI1093.WAV
MRTK0_SI1723 TEST/DR3/MRTK0/SI1723.WAV
MRTK0_SI1750 TEST/DR3/MRTK0/SI1750.WAV
MRTK0_SX103 TEST/DR3/MRTK0/SX103.WAV
MRTK0_SX13 TEST/DR3/MRTK0/SX13.WAV
MRTK0_SX193 TEST/DR3/MRTK0/SX193.WAV
MRTK0_SX283 TEST/DR3/MRTK0/SX283.WAV
MRTK0_SX373 TEST/DR3/MRTK0/SX373.WAV
MRWS1_SI1130 TEST/DR5/MRWS1/SI1130.WAV
MRWS1_SI1496 TEST/DR5/MRWS1/SI1496.WAV
MRWS1_SI500 TEST/DR5/MRWS1/SI500.WAV
MRWS1_SX140 TEST/DR5/MRWS1/SX140.WAV
MRWS1_SX230 TEST/DR5/MRWS1/SX230.WAV
MRWS1_SX320 TEST/DR5/MRWS1/SX320.WAV
MRWS1_SX410 TEST/DR5/MRWS1/SX410.WAV
MRWS1_SX50 TEST/DR5/MRWS1/SX50.WAV
MTAA0_SI1285 TEST/DR3/MTAA0/SI1285.WAV
MTAA0_SI1915 TEST/DR3/MTAA0/SI1915.WAV
MTAA0_SI596 TEST/DR3/MTAA0/SI596.WAV
MTAA0_SX115 TEST/DR3/MTAA0/SX115.WAV
MTAA0_SX205 TEST/DR3/MTAA0/SX205.WAV
MTAA0_SX25 TEST/DR3/MTAA0/SX25.WAV
MTAA0_SX295 TEST/DR3/MTAA0/SX295.WAV
MTAA0_SX385 TEST/DR3/MTAA0/SX385.WAV
MTDT0_SI1994 TEST/DR3/MTDT0/SI1994.WAV
MTDT0_SI2254 TEST/DR3/MTDT0/SI2254.WAV
MTDT0_SI994 TEST/DR3/MTDT0/SI994.WAV
MTDT0_SX184 TEST/DR3/MTDT0/SX184.WAV
MTDT0_SX274 TEST/DR3/MTDT0/SX274.WAV
MTDT0_SX364 TEST/DR3/MTDT0/SX364.WAV
MTDT0_SX4 TEST/DR3/MTDT0/SX4.WAV
MTDT0_SX94 TEST/DR3/MTDT0/SX94.WAV
MTEB0_SI1133 TEST/DR4/MTEB0/SI1133.WAV
MTEB0_SI2064 TEST/DR4/MTEB0/SI2064.WAV
MTEB0_SI503 TEST/DR4/MTEB0/SI503.WAV
MTEB0_SX143 TEST/DR4/MTEB0/SX143.WAV
MTEB0_SX233 TEST/DR4/MTEB0/SX233.WAV
MTEB0_SX323 TEST/DR4/MTEB0/SX323.WAV
MTEB0_SX413 TEST/DR4/MTEB0/SX413.WAV
MTEB0_SX53 TEST/DR4/MTEB0/SX53.WAV
MTHC0_SI1015 TEST/DR3/MTHC0/SI1015.WAV
MTHC0_SI1645 TEST/DR3/MTHC0/SI1645.WAV
MTHC0_SI2275 TEST/DR3/MTHC0/SI2275.WAV
MTHC0_SX115 TEST/DR3/MTHC0/SX115.WAV
MTHC0_SX205 TEST/DR3/MTHC0/SX205.WAV
MTHC0_SX25 TEST/DR3/MTHC0/SX25.WAV
MTHC0_SX295 TEST/DR3/MTHC0/SX295.WAV
MTHC0_SX385 TEST/DR3/MTHC0/SX385.WAV
MWJG0_SI1124 TEST/DR3/MWJG0/SI1124.WAV
MWJG0_SI1754 TEST/DR3/MWJG0/SI1754.WAV
MWJG0_SI494 TEST/DR3/MWJG0/SI494.WAV
MWJG0_SX134 TEST/DR3/MWJG0/SX134.WAV
MWJG0_SX224 TEST/DR3/MWJG0/SX224.WAV
MWJG0_SX314 TEST/DR3/MWJG0/SX314.WAV
MWJG0_SX404 TEST/DR3/MWJG0/SX404.WAV
MWJG0_SX44 TEST/DR3/MWJG0/SX44.WAV

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,192 @@
FDHC0_SI1559 TEST/DR7/FDHC0/SI1559.WAV
FDHC0_SI2189 TEST/DR7/FDHC0/SI2189.WAV
FDHC0_SI929 TEST/DR7/FDHC0/SI929.WAV
FDHC0_SX119 TEST/DR7/FDHC0/SX119.WAV
FDHC0_SX209 TEST/DR7/FDHC0/SX209.WAV
FDHC0_SX29 TEST/DR7/FDHC0/SX29.WAV
FDHC0_SX299 TEST/DR7/FDHC0/SX299.WAV
FDHC0_SX389 TEST/DR7/FDHC0/SX389.WAV
FELC0_SI1386 TEST/DR1/FELC0/SI1386.WAV
FELC0_SI2016 TEST/DR1/FELC0/SI2016.WAV
FELC0_SI756 TEST/DR1/FELC0/SI756.WAV
FELC0_SX126 TEST/DR1/FELC0/SX126.WAV
FELC0_SX216 TEST/DR1/FELC0/SX216.WAV
FELC0_SX306 TEST/DR1/FELC0/SX306.WAV
FELC0_SX36 TEST/DR1/FELC0/SX36.WAV
FELC0_SX396 TEST/DR1/FELC0/SX396.WAV
FJLM0_SI1043 TEST/DR4/FJLM0/SI1043.WAV
FJLM0_SI1673 TEST/DR4/FJLM0/SI1673.WAV
FJLM0_SI2303 TEST/DR4/FJLM0/SI2303.WAV
FJLM0_SX143 TEST/DR4/FJLM0/SX143.WAV
FJLM0_SX233 TEST/DR4/FJLM0/SX233.WAV
FJLM0_SX323 TEST/DR4/FJLM0/SX323.WAV
FJLM0_SX413 TEST/DR4/FJLM0/SX413.WAV
FJLM0_SX53 TEST/DR4/FJLM0/SX53.WAV
FMGD0_SI1564 TEST/DR6/FMGD0/SI1564.WAV
FMGD0_SI2194 TEST/DR6/FMGD0/SI2194.WAV
FMGD0_SI934 TEST/DR6/FMGD0/SI934.WAV
FMGD0_SX124 TEST/DR6/FMGD0/SX124.WAV
FMGD0_SX214 TEST/DR6/FMGD0/SX214.WAV
FMGD0_SX304 TEST/DR6/FMGD0/SX304.WAV
FMGD0_SX34 TEST/DR6/FMGD0/SX34.WAV
FMGD0_SX394 TEST/DR6/FMGD0/SX394.WAV
FMLD0_SI2185 TEST/DR8/FMLD0/SI2185.WAV
FMLD0_SI822 TEST/DR8/FMLD0/SI822.WAV
FMLD0_SI925 TEST/DR8/FMLD0/SI925.WAV
FMLD0_SX115 TEST/DR8/FMLD0/SX115.WAV
FMLD0_SX205 TEST/DR8/FMLD0/SX205.WAV
FMLD0_SX25 TEST/DR8/FMLD0/SX25.WAV
FMLD0_SX295 TEST/DR8/FMLD0/SX295.WAV
FMLD0_SX385 TEST/DR8/FMLD0/SX385.WAV
FNLP0_SI1308 TEST/DR5/FNLP0/SI1308.WAV
FNLP0_SI1938 TEST/DR5/FNLP0/SI1938.WAV
FNLP0_SI678 TEST/DR5/FNLP0/SI678.WAV
FNLP0_SX138 TEST/DR5/FNLP0/SX138.WAV
FNLP0_SX228 TEST/DR5/FNLP0/SX228.WAV
FNLP0_SX318 TEST/DR5/FNLP0/SX318.WAV
FNLP0_SX408 TEST/DR5/FNLP0/SX408.WAV
FNLP0_SX48 TEST/DR5/FNLP0/SX48.WAV
FPAS0_SI1272 TEST/DR2/FPAS0/SI1272.WAV
FPAS0_SI2204 TEST/DR2/FPAS0/SI2204.WAV
FPAS0_SI944 TEST/DR2/FPAS0/SI944.WAV
FPAS0_SX134 TEST/DR2/FPAS0/SX134.WAV
FPAS0_SX224 TEST/DR2/FPAS0/SX224.WAV
FPAS0_SX314 TEST/DR2/FPAS0/SX314.WAV
FPAS0_SX404 TEST/DR2/FPAS0/SX404.WAV
FPAS0_SX44 TEST/DR2/FPAS0/SX44.WAV
FPKT0_SI1538 TEST/DR3/FPKT0/SI1538.WAV
FPKT0_SI2168 TEST/DR3/FPKT0/SI2168.WAV
FPKT0_SI908 TEST/DR3/FPKT0/SI908.WAV
FPKT0_SX188 TEST/DR3/FPKT0/SX188.WAV
FPKT0_SX278 TEST/DR3/FPKT0/SX278.WAV
FPKT0_SX368 TEST/DR3/FPKT0/SX368.WAV
FPKT0_SX8 TEST/DR3/FPKT0/SX8.WAV
FPKT0_SX98 TEST/DR3/FPKT0/SX98.WAV
MBPM0_SI1577 TEST/DR5/MBPM0/SI1577.WAV
MBPM0_SI1584 TEST/DR5/MBPM0/SI1584.WAV
MBPM0_SI947 TEST/DR5/MBPM0/SI947.WAV
MBPM0_SX137 TEST/DR5/MBPM0/SX137.WAV
MBPM0_SX227 TEST/DR5/MBPM0/SX227.WAV
MBPM0_SX317 TEST/DR5/MBPM0/SX317.WAV
MBPM0_SX407 TEST/DR5/MBPM0/SX407.WAV
MBPM0_SX47 TEST/DR5/MBPM0/SX47.WAV
MCMJ0_SI1094 TEST/DR6/MCMJ0/SI1094.WAV
MCMJ0_SI464 TEST/DR6/MCMJ0/SI464.WAV
MCMJ0_SI602 TEST/DR6/MCMJ0/SI602.WAV
MCMJ0_SX104 TEST/DR6/MCMJ0/SX104.WAV
MCMJ0_SX14 TEST/DR6/MCMJ0/SX14.WAV
MCMJ0_SX194 TEST/DR6/MCMJ0/SX194.WAV
MCMJ0_SX284 TEST/DR6/MCMJ0/SX284.WAV
MCMJ0_SX374 TEST/DR6/MCMJ0/SX374.WAV
MDAB0_SI1039 TEST/DR1/MDAB0/SI1039.WAV
MDAB0_SI1669 TEST/DR1/MDAB0/SI1669.WAV
MDAB0_SI2299 TEST/DR1/MDAB0/SI2299.WAV
MDAB0_SX139 TEST/DR1/MDAB0/SX139.WAV
MDAB0_SX229 TEST/DR1/MDAB0/SX229.WAV
MDAB0_SX319 TEST/DR1/MDAB0/SX319.WAV
MDAB0_SX409 TEST/DR1/MDAB0/SX409.WAV
MDAB0_SX49 TEST/DR1/MDAB0/SX49.WAV
MGRT0_SI1450 TEST/DR7/MGRT0/SI1450.WAV
MGRT0_SI2080 TEST/DR7/MGRT0/SI2080.WAV
MGRT0_SI820 TEST/DR7/MGRT0/SI820.WAV
MGRT0_SX10 TEST/DR7/MGRT0/SX10.WAV
MGRT0_SX100 TEST/DR7/MGRT0/SX100.WAV
MGRT0_SX190 TEST/DR7/MGRT0/SX190.WAV
MGRT0_SX280 TEST/DR7/MGRT0/SX280.WAV
MGRT0_SX370 TEST/DR7/MGRT0/SX370.WAV
MJDH0_SI1354 TEST/DR6/MJDH0/SI1354.WAV
MJDH0_SI1984 TEST/DR6/MJDH0/SI1984.WAV
MJDH0_SI724 TEST/DR6/MJDH0/SI724.WAV
MJDH0_SX184 TEST/DR6/MJDH0/SX184.WAV
MJDH0_SX274 TEST/DR6/MJDH0/SX274.WAV
MJDH0_SX364 TEST/DR6/MJDH0/SX364.WAV
MJDH0_SX4 TEST/DR6/MJDH0/SX4.WAV
MJDH0_SX94 TEST/DR6/MJDH0/SX94.WAV
MJLN0_SI1449 TEST/DR8/MJLN0/SI1449.WAV
MJLN0_SI2079 TEST/DR8/MJLN0/SI2079.WAV
MJLN0_SI819 TEST/DR8/MJLN0/SI819.WAV
MJLN0_SX189 TEST/DR8/MJLN0/SX189.WAV
MJLN0_SX279 TEST/DR8/MJLN0/SX279.WAV
MJLN0_SX369 TEST/DR8/MJLN0/SX369.WAV
MJLN0_SX9 TEST/DR8/MJLN0/SX9.WAV
MJLN0_SX99 TEST/DR8/MJLN0/SX99.WAV
MJMP0_SI1535 TEST/DR3/MJMP0/SI1535.WAV
MJMP0_SI1791 TEST/DR3/MJMP0/SI1791.WAV
MJMP0_SI905 TEST/DR3/MJMP0/SI905.WAV
MJMP0_SX185 TEST/DR3/MJMP0/SX185.WAV
MJMP0_SX275 TEST/DR3/MJMP0/SX275.WAV
MJMP0_SX365 TEST/DR3/MJMP0/SX365.WAV
MJMP0_SX5 TEST/DR3/MJMP0/SX5.WAV
MJMP0_SX95 TEST/DR3/MJMP0/SX95.WAV
MKLT0_SI1213 TEST/DR5/MKLT0/SI1213.WAV
MKLT0_SI1843 TEST/DR5/MKLT0/SI1843.WAV
MKLT0_SI583 TEST/DR5/MKLT0/SI583.WAV
MKLT0_SX133 TEST/DR5/MKLT0/SX133.WAV
MKLT0_SX223 TEST/DR5/MKLT0/SX223.WAV
MKLT0_SX313 TEST/DR5/MKLT0/SX313.WAV
MKLT0_SX403 TEST/DR5/MKLT0/SX403.WAV
MKLT0_SX43 TEST/DR5/MKLT0/SX43.WAV
MLLL0_SI1363 TEST/DR4/MLLL0/SI1363.WAV
MLLL0_SI1993 TEST/DR4/MLLL0/SI1993.WAV
MLLL0_SI733 TEST/DR4/MLLL0/SI733.WAV
MLLL0_SX103 TEST/DR4/MLLL0/SX103.WAV
MLLL0_SX13 TEST/DR4/MLLL0/SX13.WAV
MLLL0_SX193 TEST/DR4/MLLL0/SX193.WAV
MLLL0_SX283 TEST/DR4/MLLL0/SX283.WAV
MLLL0_SX373 TEST/DR4/MLLL0/SX373.WAV
MLNT0_SI1574 TEST/DR3/MLNT0/SI1574.WAV
MLNT0_SI1902 TEST/DR3/MLNT0/SI1902.WAV
MLNT0_SI642 TEST/DR3/MLNT0/SI642.WAV
MLNT0_SX102 TEST/DR3/MLNT0/SX102.WAV
MLNT0_SX12 TEST/DR3/MLNT0/SX12.WAV
MLNT0_SX192 TEST/DR3/MLNT0/SX192.WAV
MLNT0_SX282 TEST/DR3/MLNT0/SX282.WAV
MLNT0_SX372 TEST/DR3/MLNT0/SX372.WAV
MNJM0_SI1580 TEST/DR7/MNJM0/SI1580.WAV
MNJM0_SI2210 TEST/DR7/MNJM0/SI2210.WAV
MNJM0_SI950 TEST/DR7/MNJM0/SI950.WAV
MNJM0_SX140 TEST/DR7/MNJM0/SX140.WAV
MNJM0_SX230 TEST/DR7/MNJM0/SX230.WAV
MNJM0_SX320 TEST/DR7/MNJM0/SX320.WAV
MNJM0_SX410 TEST/DR7/MNJM0/SX410.WAV
MNJM0_SX50 TEST/DR7/MNJM0/SX50.WAV
MPAM0_SI1189 TEST/DR8/MPAM0/SI1189.WAV
MPAM0_SI1819 TEST/DR8/MPAM0/SI1819.WAV
MPAM0_SI1961 TEST/DR8/MPAM0/SI1961.WAV
MPAM0_SX109 TEST/DR8/MPAM0/SX109.WAV
MPAM0_SX19 TEST/DR8/MPAM0/SX19.WAV
MPAM0_SX199 TEST/DR8/MPAM0/SX199.WAV
MPAM0_SX289 TEST/DR8/MPAM0/SX289.WAV
MPAM0_SX379 TEST/DR8/MPAM0/SX379.WAV
MTAS1_SI1473 TEST/DR2/MTAS1/SI1473.WAV
MTAS1_SI2098 TEST/DR2/MTAS1/SI2098.WAV
MTAS1_SI838 TEST/DR2/MTAS1/SI838.WAV
MTAS1_SX118 TEST/DR2/MTAS1/SX118.WAV
MTAS1_SX208 TEST/DR2/MTAS1/SX208.WAV
MTAS1_SX28 TEST/DR2/MTAS1/SX28.WAV
MTAS1_SX298 TEST/DR2/MTAS1/SX298.WAV
MTAS1_SX388 TEST/DR2/MTAS1/SX388.WAV
MTLS0_SI1370 TEST/DR4/MTLS0/SI1370.WAV
MTLS0_SI2000 TEST/DR4/MTLS0/SI2000.WAV
MTLS0_SI740 TEST/DR4/MTLS0/SI740.WAV
MTLS0_SX110 TEST/DR4/MTLS0/SX110.WAV
MTLS0_SX20 TEST/DR4/MTLS0/SX20.WAV
MTLS0_SX200 TEST/DR4/MTLS0/SX200.WAV
MTLS0_SX290 TEST/DR4/MTLS0/SX290.WAV
MTLS0_SX380 TEST/DR4/MTLS0/SX380.WAV
MWBT0_SI1553 TEST/DR1/MWBT0/SI1553.WAV
MWBT0_SI2183 TEST/DR1/MWBT0/SI2183.WAV
MWBT0_SI923 TEST/DR1/MWBT0/SI923.WAV
MWBT0_SX113 TEST/DR1/MWBT0/SX113.WAV
MWBT0_SX203 TEST/DR1/MWBT0/SX203.WAV
MWBT0_SX23 TEST/DR1/MWBT0/SX23.WAV
MWBT0_SX293 TEST/DR1/MWBT0/SX293.WAV
MWBT0_SX383 TEST/DR1/MWBT0/SX383.WAV
MWEW0_SI1361 TEST/DR2/MWEW0/SI1361.WAV
MWEW0_SI1991 TEST/DR2/MWEW0/SI1991.WAV
MWEW0_SI731 TEST/DR2/MWEW0/SI731.WAV
MWEW0_SX101 TEST/DR2/MWEW0/SX101.WAV
MWEW0_SX11 TEST/DR2/MWEW0/SX11.WAV
MWEW0_SX191 TEST/DR2/MWEW0/SX191.WAV
MWEW0_SX281 TEST/DR2/MWEW0/SX281.WAV
MWEW0_SX371 TEST/DR2/MWEW0/SX371.WAV

View File

View File

@ -0,0 +1,336 @@
# Copyright 2021 Piotr Żelasko
# Mingshuang Luo
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
from functools import lru_cache
from pathlib import Path
from typing import List, Union
from lhotse import CutSet, Fbank, FbankConfig, load_manifest
from lhotse.dataset import (
BucketingSampler,
CutConcatenate,
CutMix,
K2SpeechRecognitionDataset,
PrecomputedFeatures,
SingleCutSampler,
SpecAugment,
)
from lhotse.dataset.input_strategies import OnTheFlyFeatures
from torch.utils.data import DataLoader
from icefall.dataset.datamodule import DataModule
from icefall.utils import str2bool
class TimitAsrDataModule(DataModule):
"""
DataModule for k2 ASR experiments.
It assumes there is always one train and valid dataloader,
but there can be multiple test dataloaders (e.g. LibriSpeech test-clean
and test-other).
It contains all the common data pipeline modules used in ASR
experiments, e.g.:
- dynamic batch size,
- bucketing samplers,
- cut concatenation,
- augmentation,
- on-the-fly feature extraction
This class should be derived for specific corpora used in ASR tasks.
"""
@classmethod
def add_arguments(cls, parser: argparse.ArgumentParser):
super().add_arguments(parser)
group = parser.add_argument_group(
title="ASR data related options",
description="These options are used for the preparation of "
"PyTorch DataLoaders from Lhotse CutSet's -- they control the "
"effective batch sizes, sampling strategies, applied data "
"augmentations, etc.",
)
group.add_argument(
"--feature-dir",
type=Path,
default=Path("data/fbank"),
help="Path to directory with train/valid/test cuts.",
)
group.add_argument(
"--max-duration",
type=int,
default=200.0,
help="Maximum pooled recordings duration (seconds) in a "
"single batch. You can reduce it if it causes CUDA OOM.",
)
group.add_argument(
"--bucketing-sampler",
type=str2bool,
default=True,
help="When enabled, the batches will come from buckets of "
"similar duration (saves padding frames).",
)
group.add_argument(
"--num-buckets",
type=int,
default=30,
help="The number of buckets for the BucketingSampler"
"(you might want to increase it for larger datasets).",
)
group.add_argument(
"--concatenate-cuts",
type=str2bool,
default=False,
help="When enabled, utterances (cuts) will be concatenated "
"to minimize the amount of padding.",
)
group.add_argument(
"--duration-factor",
type=float,
default=1.0,
help="Determines the maximum duration of a concatenated cut "
"relative to the duration of the longest cut in a batch.",
)
group.add_argument(
"--gap",
type=float,
default=1.0,
help="The amount of padding (in seconds) inserted between "
"concatenated cuts. This padding is filled with noise when "
"noise augmentation is used.",
)
group.add_argument(
"--on-the-fly-feats",
type=str2bool,
default=False,
help="When enabled, use on-the-fly cut mixing and feature "
"extraction. Will drop existing precomputed feature manifests "
"if available.",
)
group.add_argument(
"--shuffle",
type=str2bool,
default=True,
help="When enabled (=default), the examples will be "
"shuffled for each epoch.",
)
group.add_argument(
"--return-cuts",
type=str2bool,
default=True,
help="When enabled, each batch will have the "
"field: batch['supervisions']['cut'] with the cuts that "
"were used to construct it.",
)
group.add_argument(
"--num-workers",
type=int,
default=2,
help="The number of training dataloader workers that "
"collect the batches.",
)
def train_dataloaders(self) -> DataLoader:
logging.info("About to get train cuts")
cuts_train = self.train_cuts()
logging.info("About to get Musan cuts")
cuts_musan = load_manifest(self.args.feature_dir / "cuts_musan.json.gz")
logging.info("About to create train dataset")
transforms = [CutMix(cuts=cuts_musan, prob=0.5, snr=(10, 20))]
if self.args.concatenate_cuts:
logging.info(
f"Using cut concatenation with duration factor "
f"{self.args.duration_factor} and gap {self.args.gap}."
)
# Cut concatenation should be the first transform in the list,
# so that if we e.g. mix noise in, it will fill the gaps between
# different utterances.
transforms = [
CutConcatenate(
duration_factor=self.args.duration_factor, gap=self.args.gap
)
] + transforms
input_transforms = [
SpecAugment(
num_frame_masks=2,
features_mask_size=27,
num_feature_masks=2,
frames_mask_size=100,
)
]
train = K2SpeechRecognitionDataset(
cut_transforms=transforms,
input_transforms=input_transforms,
return_cuts=self.args.return_cuts,
)
if self.args.on_the_fly_feats:
# NOTE: the PerturbSpeed transform should be added only if we
# remove it from data prep stage.
# Add on-the-fly speed perturbation; since originally it would
# have increased epoch size by 3, we will apply prob 2/3 and use
# 3x more epochs.
# Speed perturbation probably should come first before
# concatenation, but in principle the transforms order doesn't have
# to be strict (e.g. could be randomized)
# transforms = [PerturbSpeed(factors=[0.9, 1.1], p=2/3)] + transforms # noqa
# Drop feats to be on the safe side.
train = K2SpeechRecognitionDataset(
cut_transforms=transforms,
input_strategy=OnTheFlyFeatures(
Fbank(FbankConfig(num_mel_bins=80))
),
input_transforms=input_transforms,
return_cuts=self.args.return_cuts,
)
if self.args.bucketing_sampler:
logging.info("Using BucketingSampler.")
train_sampler = BucketingSampler(
cuts_train,
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
bucket_method="equal_duration",
drop_last=True,
)
else:
logging.info("Using SingleCutSampler.")
train_sampler = SingleCutSampler(
cuts_train,
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
)
logging.info("About to create train dataloader")
train_dl = DataLoader(
train,
sampler=train_sampler,
batch_size=None,
num_workers=self.args.num_workers,
persistent_workers=False,
)
return train_dl
def valid_dataloaders(self) -> DataLoader:
logging.info("About to get dev cuts")
cuts_valid = self.valid_cuts()
transforms = []
if self.args.concatenate_cuts:
transforms = [
CutConcatenate(
duration_factor=self.args.duration_factor, gap=self.args.gap
)
] + transforms
logging.info("About to create dev dataset")
if self.args.on_the_fly_feats:
validate = K2SpeechRecognitionDataset(
cut_transforms=transforms,
input_strategy=OnTheFlyFeatures(
Fbank(FbankConfig(num_mel_bins=80))
),
return_cuts=self.args.return_cuts,
)
else:
validate = K2SpeechRecognitionDataset(
cut_transforms=transforms,
return_cuts=self.args.return_cuts,
)
valid_sampler = SingleCutSampler(
cuts_valid,
max_duration=self.args.max_duration,
shuffle=False,
)
logging.info("About to create dev dataloader")
valid_dl = DataLoader(
validate,
sampler=valid_sampler,
batch_size=None,
num_workers=2,
persistent_workers=False,
)
return valid_dl
def test_dataloaders(self) -> Union[DataLoader, List[DataLoader]]:
cuts = self.test_cuts()
is_list = isinstance(cuts, list)
test_loaders = []
if not is_list:
cuts = [cuts]
for cuts_test in cuts:
logging.debug("About to create test dataset")
test = K2SpeechRecognitionDataset(
input_strategy=OnTheFlyFeatures(
Fbank(FbankConfig(num_mel_bins=80))
)
if self.args.on_the_fly_feats
else PrecomputedFeatures(),
return_cuts=self.args.return_cuts,
)
sampler = SingleCutSampler(
cuts_test, max_duration=self.args.max_duration
)
logging.debug("About to create test dataloader")
test_dl = DataLoader(
test, batch_size=None, sampler=sampler, num_workers=1
)
test_loaders.append(test_dl)
if is_list:
return test_loaders
else:
return test_loaders[0]
@lru_cache()
def train_cuts(self) -> CutSet:
logging.info("About to get train cuts")
cuts_train = load_manifest(
self.args.feature_dir / "cuts_TRAIN.json.gz"
)
return cuts_train
@lru_cache()
def valid_cuts(self) -> CutSet:
logging.info("About to get dev cuts")
cuts_valid = load_manifest(
self.args.feature_dir / "cuts_DEV.json.gz"
)
return cuts_valid
@lru_cache()
def test_cuts(self) -> CutSet:
logging.debug("About to get test cuts")
cuts_test = load_manifest(
self.args.feature_dir / "cuts_TEST.json.gz"
)
return cuts_test

View File

@ -0,0 +1,503 @@
#!/usr/bin/env python3
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang
# Mingshuang Luo)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import k2
import torch
import torch.nn as nn
from asr_datamodule import TimitAsrDataModule
from model import TdnnLstm
from icefall.checkpoint import average_checkpoints, load_checkpoint
from icefall.decode import (
get_lattice,
nbest_decoding,
one_best_decoding,
rescore_with_n_best_list,
rescore_with_whole_lattice,
)
from icefall.lexicon import Lexicon
from icefall.utils import (
AttributeDict,
get_texts,
setup_logger,
store_transcripts,
str2bool,
write_error_stats,
)
def get_parser():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--epoch",
type=int,
default=19,
help="It specifies the checkpoint to use for decoding."
"Note: Epoch counts from 0.",
)
parser.add_argument(
"--avg",
type=int,
default=5,
help="Number of checkpoints to average. Automatically select "
"consecutive checkpoints before the checkpoint specified by "
"'--epoch'. ",
)
parser.add_argument(
"--method",
type=str,
default="whole-lattice-rescoring",
help="""Decoding method.
Supported values are:
- (1) 1best. Extract the best path from the decoding lattice as the
decoding result.
- (2) nbest. Extract n paths from the decoding lattice; the path
with the highest score is the decoding result.
- (3) nbest-rescoring. Extract n paths from the decoding lattice,
rescore them with an n-gram LM (e.g., a 4-gram LM), the path with
the highest score is the decoding result.
- (4) whole-lattice-rescoring. Rescore the decoding lattice with an
n-gram LM (e.g., a 4-gram LM), the best path of rescored lattice
is the decoding result.
""",
)
parser.add_argument(
"--num-paths",
type=int,
default=100,
help="""Number of paths for n-best based decoding method.
Used only when "method" is one of the following values:
nbest, nbest-rescoring
""",
)
parser.add_argument(
"--nbest-scale",
type=float,
default=0.5,
help="""The scale to be applied to `lattice.scores`.
It's needed if you use any kinds of n-best based rescoring.
Used only when "method" is one of the following values:
nbest, nbest-rescoring
A smaller value results in more unique paths.
""",
)
parser.add_argument(
"--export",
type=str2bool,
default=False,
help="""When enabled, the averaged model is saved to
tdnn/exp/pretrained.pt. Note: only model.state_dict() is saved.
pretrained.pt contains a dict {"model": model.state_dict()},
which can be loaded by `icefall.checkpoint.load_checkpoint()`.
""",
)
return parser
def get_params() -> AttributeDict:
params = AttributeDict(
{
"exp_dir": Path("tdnn_lstm_ctc/exp/"),
"lang_dir": Path("data/lang_phone"),
"lm_dir": Path("data/lm"),
"feature_dim": 80,
"subsampling_factor": 3,
"search_beam": 20,
"output_beam": 5,
"min_active_states": 30,
"max_active_states": 10000,
"use_double_scores": True,
}
)
return params
def decode_one_batch(
params: AttributeDict,
model: nn.Module,
HLG: k2.Fsa,
batch: dict,
lexicon: Lexicon,
G: Optional[k2.Fsa] = None,
) -> Dict[str, List[List[str]]]:
"""Decode one batch and return the result in a dict. The dict has the
following format:
- key: It indicates the setting used for decoding. For example,
if no rescoring is used, the key is the string `no_rescore`.
If LM rescoring is used, the key is the string `lm_scale_xxx`,
where `xxx` is the value of `lm_scale`. An example key is
`lm_scale_0.7`
- value: It contains the decoding result. `len(value)` equals to
batch size. `value[i]` is the decoding result for the i-th
utterance in the given batch.
Args:
params:
It's the return value of :func:`get_params`.
- params.method is "1best", it uses 1best decoding without LM rescoring.
- params.method is "nbest", it uses nbest decoding without LM rescoring.
- params.method is "nbest-rescoring", it uses nbest LM rescoring.
- params.method is "whole-lattice-rescoring", it uses whole lattice LM
rescoring.
model:
The neural model.
HLG:
The decoding graph.
batch:
It is the return value from iterating
`lhotse.dataset.K2SpeechRecognitionDataset`. See its documentation
for the format of the `batch`.
lexicon:
It contains word symbol table.
G:
An LM. It is not None when params.method is "nbest-rescoring"
or "whole-lattice-rescoring". In general, the G in HLG
is a 3-gram LM, while this G is a 4-gram LM.
Returns:
Return the decoding result. See above description for the format of
the returned dict.
"""
device = HLG.device
feature = batch["inputs"]
assert feature.ndim == 3
feature = feature.to(device)
# at entry, feature is (N, T, C)
feature = feature.permute(0, 2, 1) # now feature is (N, C, T)
nnet_output = model(feature)
# nnet_output is (N, T, C)
supervisions = batch["supervisions"]
supervision_segments = torch.stack(
(
supervisions["sequence_idx"],
supervisions["start_frame"] // params.subsampling_factor,
supervisions["num_frames"] // params.subsampling_factor,
),
1,
).to(torch.int32)
lattice = get_lattice(
nnet_output=nnet_output,
decoding_graph=HLG,
supervision_segments=supervision_segments,
search_beam=params.search_beam,
output_beam=params.output_beam,
min_active_states=params.min_active_states,
max_active_states=params.max_active_states,
)
if params.method in ["1best", "nbest"]:
if params.method == "1best":
best_path = one_best_decoding(
lattice=lattice, use_double_scores=params.use_double_scores
)
key = "no_rescore"
else:
best_path = nbest_decoding(
lattice=lattice,
num_paths=params.num_paths,
use_double_scores=params.use_double_scores,
nbest_scale=params.nbest_scale,
)
key = f"no_rescore-{params.num_paths}"
hyps = get_texts(best_path)
hyps = [[lexicon.word_table[i] for i in ids] for ids in hyps]
return {key: hyps}
assert params.method in ["nbest-rescoring", "whole-lattice-rescoring"]
lm_scale_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
lm_scale_list += [0.8, 0.9, 1.0, 1.1, 1.2, 1.3]
lm_scale_list += [1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0]
if params.method == "nbest-rescoring":
best_path_dict = rescore_with_n_best_list(
lattice=lattice,
G=G,
num_paths=params.num_paths,
lm_scale_list=lm_scale_list,
nbest_scale=params.nbest_scale,
)
else:
best_path_dict = rescore_with_whole_lattice(
lattice=lattice,
G_with_epsilon_loops=G,
lm_scale_list=lm_scale_list,
)
ans = dict()
for lm_scale_str, best_path in best_path_dict.items():
hyps = get_texts(best_path)
hyps = [[lexicon.word_table[i] for i in ids] for ids in hyps]
ans[lm_scale_str] = hyps
return ans
def decode_dataset(
dl: torch.utils.data.DataLoader,
params: AttributeDict,
model: nn.Module,
HLG: k2.Fsa,
lexicon: Lexicon,
G: Optional[k2.Fsa] = None,
) -> Dict[str, List[Tuple[List[str], List[str]]]]:
"""Decode dataset.
Args:
dl:
PyTorch's dataloader containing the dataset to decode.
params:
It is returned by :func:`get_params`.
model:
The neural model.
HLG:
The decoding graph.
lexicon:
It contains word symbol table.
G:
An LM. It is not None when params.method is "nbest-rescoring"
or "whole-lattice-rescoring". In general, the G in HLG
is a 3-gram LM, while this G is a 4-gram LM.
Returns:
Return a dict, whose key may be "no-rescore" if no LM rescoring
is used, or it may be "lm_scale_0.7" if LM rescoring is used.
Its value is a list of tuples. Each tuple contains two elements:
The first is the reference transcript, and the second is the
predicted result.
"""
results = []
num_cuts = 0
try:
num_batches = len(dl)
except TypeError:
num_batches = "?"
results = defaultdict(list)
for batch_idx, batch in enumerate(dl):
texts = batch["supervisions"]["text"]
hyps_dict = decode_one_batch(
params=params,
model=model,
HLG=HLG,
batch=batch,
lexicon=lexicon,
G=G,
)
for lm_scale, hyps in hyps_dict.items():
this_batch = []
assert len(hyps) == len(texts)
for hyp_words, ref_text in zip(hyps, texts):
ref_words = ref_text.split()
this_batch.append((ref_words, hyp_words))
results[lm_scale].extend(this_batch)
num_cuts += len(batch["supervisions"]["text"])
if batch_idx % 100 == 0:
batch_str = f"{batch_idx}/{num_batches}"
logging.info(
f"batch {batch_str}, cuts processed until now is {num_cuts}"
)
return results
def save_results(
params: AttributeDict,
test_set_name: str,
results_dict: Dict[str, List[Tuple[List[int], List[int]]]],
):
test_set_wers = dict()
for key, results in results_dict.items():
recog_path = params.exp_dir / f"recogs-{test_set_name}-{key}.txt"
store_transcripts(filename=recog_path, texts=results)
logging.info(f"The transcripts are stored in {recog_path}")
# The following prints out PERs, per-phone error statistics and aligned
# ref/hyp pairs.
errs_filename = params.exp_dir / f"errs-{test_set_name}-{key}.txt"
with open(errs_filename, "w") as f:
wer = write_error_stats(f, f"{test_set_name}-{key}", results)
test_set_wers[key] = wer
logging.info("Wrote detailed error stats to {}".format(errs_filename))
test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
errs_info = params.exp_dir / f"per-summary-{test_set_name}.txt"
with open(errs_info, "w") as f:
print("settings\tPER", file=f)
for key, val in test_set_wers:
print("{}\t{}".format(key, val), file=f)
s = "\nFor {}, PER of different settings are:\n".format(test_set_name)
note = "\tbest for {}".format(test_set_name)
for key, val in test_set_wers:
s += "{}\t{}{}\n".format(key, val, note)
note = ""
logging.info(s)
@torch.no_grad()
def main():
parser = get_parser()
TimitAsrDataModule.add_arguments(parser)
args = parser.parse_args()
params = get_params()
params.update(vars(args))
setup_logger(f"{params.exp_dir}/log/log-decode")
logging.info("Decoding started")
logging.info(params)
lexicon = Lexicon(params.lang_dir)
max_phone_id = max(lexicon.tokens)
device = torch.device("cpu")
if torch.cuda.is_available():
device = torch.device("cuda", 0)
logging.info(f"device: {device}")
HLG = k2.Fsa.from_dict(
torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu")
)
HLG = HLG.to(device)
assert HLG.requires_grad is False
if not hasattr(HLG, "lm_scores"):
HLG.lm_scores = HLG.scores.clone()
if params.method in ["nbest-rescoring", "whole-lattice-rescoring"]:
if not (params.lm_dir / "G_4_gram.pt").is_file():
logging.info("Loading G_4_gram.fst.txt")
logging.warning("It may take 8 minutes.")
with open(params.lm_dir / "G_4_gram.fst.txt") as f:
first_word_disambig_id = lexicon.word_table["#0"]
G = k2.Fsa.from_openfst(f.read(), acceptor=False)
# G.aux_labels is not needed in later computations, so
# remove it here.
del G.aux_labels
# CAUTION: The following line is crucial.
# Arcs entering the back-off state have label equal to #0.
# We have to change it to 0 here.
G.labels[G.labels >= first_word_disambig_id] = 0
G = k2.Fsa.from_fsas([G]).to(device)
G = k2.arc_sort(G)
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
else:
logging.info("Loading pre-compiled G_4_gram.pt")
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location="cpu")
G = k2.Fsa.from_dict(d).to(device)
if params.method == "whole-lattice-rescoring":
# Add epsilon self-loops to G as we will compose
# it with the whole lattice later
G = k2.add_epsilon_self_loops(G)
G = k2.arc_sort(G)
G = G.to(device)
# G.lm_scores is used to replace HLG.lm_scores during
# LM rescoring.
G.lm_scores = G.scores.clone()
else:
G = None
model = TdnnLstm(
num_features=params.feature_dim,
num_classes=max_phone_id + 1, # +1 for the blank symbol
subsampling_factor=params.subsampling_factor,
)
if params.avg == 1:
load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model)
#load_checkpoint(f"tmp/icefall_asr_librispeech_tdnn-lstm_ctc/exp/pretrained.pt", model)
else:
start = params.epoch - params.avg + 1
filenames = []
for i in range(start, params.epoch + 1):
if start >= 0:
filenames.append(f"{params.exp_dir}/epoch-{i}.pt")
logging.info(f"averaging {filenames}")
model.load_state_dict(average_checkpoints(filenames))
if params.export:
logging.info(f"Export averaged model to {params.exp_dir}/pretrained.pt")
torch.save(
{"model": model.state_dict()}, f"{params.exp_dir}/pretrained.pt"
)
return
model.to(device)
model.eval()
timit = TimitAsrDataModule(args)
# CAUTION: `test_sets` is for displaying only.
# If you want to skip test-clean, you have to skip
# it inside the for loop. That is, use
#
# if test_set == 'test-clean': continue
#
#test_sets = ["test-clean", "test-other"]
#test_sets = ["test-other"]
#for test_set, test_dl in zip(test_sets, librispeech.test_dataloaders()):
#if test_set == "test-clean": continue
#if test_set == "test-other": break
test_set = "TEST"
test_dl = timit.test_dataloaders()
results_dict = decode_dataset(
dl=test_dl,
params=params,
model=model,
HLG=HLG,
lexicon=lexicon,
G=G,
)
save_results(
params=params, test_set_name=test_set, results_dict=results_dict
)
logging.info("Done!")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,103 @@
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
import torch.nn as nn
class TdnnLstm(nn.Module):
def __init__(
self, num_features: int, num_classes: int, subsampling_factor: int = 3
) -> None:
"""
Args:
num_features:
The input dimension of the model.
num_classes:
The output dimension of the model.
subsampling_factor:
It reduces the number of output frames by this factor.
"""
super().__init__()
self.num_features = num_features
self.num_classes = num_classes
self.subsampling_factor = subsampling_factor
self.tdnn = nn.Sequential(
nn.Conv1d(
in_channels=num_features,
out_channels=500,
kernel_size=3,
stride=1,
padding=1,
),
nn.ReLU(inplace=True),
nn.BatchNorm1d(num_features=500, affine=False),
nn.Conv1d(
in_channels=500,
out_channels=500,
kernel_size=3,
stride=1,
padding=1,
),
nn.ReLU(inplace=True),
nn.BatchNorm1d(num_features=500, affine=False),
nn.Conv1d(
in_channels=500,
out_channels=500,
kernel_size=3,
stride=self.subsampling_factor, # stride: subsampling_factor!
padding=1,
),
nn.ReLU(inplace=True),
nn.BatchNorm1d(num_features=500, affine=False),
)
self.lstms = nn.ModuleList(
[
nn.LSTM(input_size=500, hidden_size=500, num_layers=1)
for _ in range(5)
]
)
self.lstm_bnorms = nn.ModuleList(
[nn.BatchNorm1d(num_features=500, affine=False) for _ in range(5)]
)
self.dropout = nn.Dropout(0.2)
self.linear = nn.Linear(in_features=500, out_features=self.num_classes)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Args:
x:
Its shape is [N, C, T]
Returns:
The output tensor has shape [N, T, C]
"""
x = self.tdnn(x)
x = x.permute(2, 0, 1) # (N, C, T) -> (T, N, C) -> how LSTM expects it
for lstm, bnorm in zip(self.lstms, self.lstm_bnorms):
x_new, _ = lstm(x)
x_new = bnorm(x_new.permute(1, 2, 0)).permute(
2, 0, 1
) # (T, N, C) -> (N, C, T) -> (T, N, C)
x_new = self.dropout(x_new)
x = x_new + x # skip connections
x = x.transpose(
1, 0
) # (T, N, C) -> (N, T, C) -> linear expects "features" in the last dim
x = self.linear(x)
x = nn.functional.log_softmax(x, dim=-1)
return x

View File

@ -0,0 +1,595 @@
#!/usr/bin/env python3
# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang
# Mingshuang Luo)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
from pathlib import Path
from shutil import copyfile
from typing import Optional, Tuple
import k2
import torch
import torch.multiprocessing as mp
import torch.nn as nn
import torch.optim as optim
from asr_datamodule import TimitAsrDataModule
from lhotse.utils import fix_random_seed
from model import TdnnLstm
from torch import Tensor
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import StepLR
from torch.utils.tensorboard import SummaryWriter
from icefall.checkpoint import load_checkpoint
from icefall.checkpoint import save_checkpoint as save_checkpoint_impl
from icefall.dist import cleanup_dist, setup_dist
from icefall.graph_compiler import CtcTrainingGraphCompiler
from icefall.lexicon import Lexicon
from icefall.utils import (
AttributeDict,
MetricsTracker,
encode_supervisions,
get_env_info,
setup_logger,
str2bool,
)
def get_parser():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--world-size",
type=int,
default=1,
help="Number of GPUs for DDP training.",
)
parser.add_argument(
"--master-port",
type=int,
default=12354,
help="Master port to use for DDP training.",
)
parser.add_argument(
"--tensorboard",
type=str2bool,
default=True,
help="Should various information be logged in tensorboard.",
)
parser.add_argument(
"--num-epochs",
type=int,
default=20,
help="Number of epochs to train.",
)
parser.add_argument(
"--start-epoch",
type=int,
default=0,
help="""Resume training from from this epoch.
If it is positive, it will load checkpoint from
tdnn_lstm_ctc/exp/epoch-{start_epoch-1}.pt
""",
)
return parser
def get_params() -> AttributeDict:
"""Return a dict containing training parameters.
All training related parameters that are not passed from the commandline
is saved in the variable `params`.
Commandline options are merged into `params` after they are parsed, so
you can also access them via `params`.
Explanation of options saved in `params`:
- exp_dir: It specifies the directory where all training related
files, e.g., checkpoints, log, etc, are saved
- lang_dir: It contains language related input files such as
"lexicon.txt"
- lr: It specifies the initial learning rate
- feature_dim: The model input dim. It has to match the one used
in computing features.
- weight_decay: The weight_decay for the optimizer.
- subsampling_factor: The subsampling factor for the model.
- best_train_loss: Best training loss so far. It is used to select
the model that has the lowest training loss. It is
updated during the training.
- best_valid_loss: Best validation loss so far. It is used to select
the model that has the lowest validation loss. It is
updated during the training.
- best_train_epoch: It is the epoch that has the best training loss.
- best_valid_epoch: It is the epoch that has the best validation loss.
- batch_idx_train: Used to writing statistics to tensorboard. It
contains number of batches trained so far across
epochs.
- log_interval: Print training loss if batch_idx % log_interval` is 0
- reset_interval: Reset statistics if batch_idx % reset_interval is 0
- valid_interval: Run validation if batch_idx % valid_interval` is 0
- beam_size: It is used in k2.ctc_loss
- reduction: It is used in k2.ctc_loss
- use_double_scores: It is used in k2.ctc_loss
"""
params = AttributeDict(
{
"exp_dir": Path("tdnn_lstm_ctc/exp"),
"lang_dir": Path("data/lang_phone"),
"lr": 1e-3,
"feature_dim": 80,
"weight_decay": 5e-4,
"subsampling_factor": 3,
"best_train_loss": float("inf"),
"best_valid_loss": float("inf"),
"best_train_epoch": -1,
"best_valid_epoch": -1,
"batch_idx_train": 0,
"log_interval": 10,
"reset_interval": 200,
"valid_interval": 1000,
"beam_size": 10,
"reduction": "sum",
"use_double_scores": True,
"env_info": get_env_info(),
}
)
return params
def load_checkpoint_if_available(
params: AttributeDict,
model: nn.Module,
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None,
) -> None:
"""Load checkpoint from file.
If params.start_epoch is positive, it will load the checkpoint from
`params.start_epoch - 1`. Otherwise, this function does nothing.
Apart from loading state dict for `model`, `optimizer` and `scheduler`,
it also updates `best_train_epoch`, `best_train_loss`, `best_valid_epoch`,
and `best_valid_loss` in `params`.
Args:
params:
The return value of :func:`get_params`.
model:
The training model.
optimizer:
The optimizer that we are using.
scheduler:
The learning rate scheduler we are using.
Returns:
Return None.
"""
if params.start_epoch <= 0:
return
filename = params.exp_dir / f"epoch-{params.start_epoch-1}.pt"
saved_params = load_checkpoint(
filename,
model=model,
optimizer=optimizer,
scheduler=scheduler,
)
keys = [
"best_train_epoch",
"best_valid_epoch",
"batch_idx_train",
"best_train_loss",
"best_valid_loss",
]
for k in keys:
params[k] = saved_params[k]
return saved_params
def save_checkpoint(
params: AttributeDict,
model: nn.Module,
optimizer: torch.optim.Optimizer,
scheduler: torch.optim.lr_scheduler._LRScheduler,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
Args:
params:
It is returned by :func:`get_params`.
model:
The training model.
"""
if rank != 0:
return
filename = params.exp_dir / f"epoch-{params.cur_epoch}.pt"
save_checkpoint_impl(
filename=filename,
model=model,
params=params,
optimizer=optimizer,
scheduler=scheduler,
rank=rank,
)
if params.best_train_epoch == params.cur_epoch:
best_train_filename = params.exp_dir / "best-train-loss.pt"
copyfile(src=filename, dst=best_train_filename)
if params.best_valid_epoch == params.cur_epoch:
best_valid_filename = params.exp_dir / "best-valid-loss.pt"
copyfile(src=filename, dst=best_valid_filename)
def compute_loss(
params: AttributeDict,
model: nn.Module,
batch: dict,
graph_compiler: CtcTrainingGraphCompiler,
is_training: bool,
) -> Tuple[Tensor, MetricsTracker]:
"""
Compute CTC loss given the model and its inputs.
Args:
params:
Parameters for training. See :func:`get_params`.
model:
The model for training. It is an instance of TdnnLstm in our case.
batch:
A batch of data. See `lhotse.dataset.K2SpeechRecognitionDataset()`
for the content in it.
graph_compiler:
It is used to build a decoding graph from a ctc topo and training
transcript. The training transcript is contained in the given `batch`,
while the ctc topo is built when this compiler is instantiated.
is_training:
True for training. False for validation. When it is True, this
function enables autograd during computation; when it is False, it
disables autograd.
"""
device = graph_compiler.device
feature = batch["inputs"]
# at entry, feature is (N, T, C)
feature = feature.permute(0, 2, 1) # now feature is (N, C, T)
assert feature.ndim == 3
feature = feature.to(device)
with torch.set_grad_enabled(is_training):
nnet_output = model(feature)
# nnet_output is (N, T, C)
# NOTE: We need `encode_supervisions` to sort sequences with
# different duration in decreasing order, required by
# `k2.intersect_dense` called in `k2.ctc_loss`
supervisions = batch["supervisions"]
supervision_segments, texts = encode_supervisions(
supervisions, subsampling_factor=params.subsampling_factor
)
decoding_graph = graph_compiler.compile(texts)
dense_fsa_vec = k2.DenseFsaVec(
nnet_output,
supervision_segments,
allow_truncate=params.subsampling_factor - 1,
)
loss = k2.ctc_loss(
decoding_graph=decoding_graph,
dense_fsa_vec=dense_fsa_vec,
output_beam=params.beam_size,
reduction=params.reduction,
use_double_scores=params.use_double_scores,
)
assert loss.requires_grad == is_training
info = MetricsTracker()
info["frames"] = supervision_segments[:, 2].sum().item()
info["loss"] = loss.detach().cpu().item()
return loss, info
def compute_validation_loss(
params: AttributeDict,
model: nn.Module,
graph_compiler: CtcTrainingGraphCompiler,
valid_dl: torch.utils.data.DataLoader,
world_size: int = 1,
) -> MetricsTracker:
"""Run the validation process. The validation loss
is saved in `params.valid_loss`.
"""
model.eval()
tot_loss = MetricsTracker()
for batch_idx, batch in enumerate(valid_dl):
loss, loss_info = compute_loss(
params=params,
model=model,
batch=batch,
graph_compiler=graph_compiler,
is_training=False,
)
assert loss.requires_grad is False
tot_loss = tot_loss + loss_info
if world_size > 1:
tot_loss.reduce(loss.device)
loss_value = tot_loss["loss"] / tot_loss["frames"]
if loss_value < params.best_valid_loss:
params.best_valid_epoch = params.cur_epoch
params.best_valid_loss = loss_value
return tot_loss
def train_one_epoch(
params: AttributeDict,
model: nn.Module,
optimizer: torch.optim.Optimizer,
graph_compiler: CtcTrainingGraphCompiler,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
) -> None:
"""Train the model for one epoch.
The training loss from the mean of all frames is saved in
`params.train_loss`. It runs the validation process every
`params.valid_interval` batches.
Args:
params:
It is returned by :func:`get_params`.
model:
The model for training.
optimizer:
The optimizer we are using.
graph_compiler:
It is used to convert transcripts to FSAs.
train_dl:
Dataloader for the training dataset.
valid_dl:
Dataloader for the validation dataset.
tb_writer:
Writer to write log messages to tensorboard.
world_size:
Number of nodes in DDP training. If it is 1, DDP is disabled.
"""
model.train()
tot_loss = MetricsTracker()
for batch_idx, batch in enumerate(train_dl):
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
loss, loss_info = compute_loss(
params=params,
model=model,
batch=batch,
graph_compiler=graph_compiler,
is_training=True,
)
# summary stats.
tot_loss = (tot_loss * (1 - 1 / params.reset_interval)) + loss_info
optimizer.zero_grad()
loss.backward()
clip_grad_norm_(model.parameters(), 5.0, 2.0)
optimizer.step()
if batch_idx % params.log_interval == 0:
logging.info(
f"Epoch {params.cur_epoch}, "
f"batch {batch_idx}, loss[{loss_info}], "
f"tot_loss[{tot_loss}], batch size: {batch_size}"
)
if batch_idx % params.log_interval == 0:
if tb_writer is not None:
loss_info.write_summary(
tb_writer, "train/current_", params.batch_idx_train
)
tot_loss.write_summary(
tb_writer, "train/tot_", params.batch_idx_train
)
if batch_idx > 0 and batch_idx % params.valid_interval == 0:
valid_info = compute_validation_loss(
params=params,
model=model,
graph_compiler=graph_compiler,
valid_dl=valid_dl,
world_size=world_size,
)
model.train()
logging.info(f"Epoch {params.cur_epoch}, validation {valid_info}")
if tb_writer is not None:
valid_info.write_summary(
tb_writer,
"train/valid_",
params.batch_idx_train,
)
loss_value = tot_loss["loss"] / tot_loss["frames"]
params.train_loss = loss_value
if params.train_loss < params.best_train_loss:
params.best_train_epoch = params.cur_epoch
params.best_train_loss = params.train_loss
def run(rank, world_size, args):
"""
Args:
rank:
It is a value between 0 and `world_size-1`, which is
passed automatically by `mp.spawn()` in :func:`main`.
The node with rank 0 is responsible for saving checkpoint.
world_size:
Number of GPUs for DDP training.
args:
The return value of get_parser().parse_args()
"""
params = get_params()
params.update(vars(args))
fix_random_seed(42)
if world_size > 1:
setup_dist(rank, world_size, params.master_port)
setup_logger(f"{params.exp_dir}/log/log-train")
logging.info("Training started")
logging.info(params)
if args.tensorboard and rank == 0:
tb_writer = SummaryWriter(log_dir=f"{params.exp_dir}/tensorboard")
else:
tb_writer = None
lexicon = Lexicon(params.lang_dir)
max_phone_id = max(lexicon.tokens)
device = torch.device("cpu")
if torch.cuda.is_available():
device = torch.device("cuda", rank)
graph_compiler = CtcTrainingGraphCompiler(lexicon=lexicon, device=device)
model = TdnnLstm(
num_features=params.feature_dim,
num_classes=max_phone_id + 1, # +1 for the blank symbol
subsampling_factor=params.subsampling_factor,
)
checkpoints = load_checkpoint_if_available(params=params, model=model)
model.to(device)
if world_size > 1:
model = DDP(model, device_ids=[rank])
optimizer = optim.AdamW(
model.parameters(),
lr=params.lr,
weight_decay=params.weight_decay,
)
scheduler = StepLR(optimizer, step_size=8, gamma=0.8)
if checkpoints:
optimizer.load_state_dict(checkpoints["optimizer"])
scheduler.load_state_dict(checkpoints["scheduler"])
timit = TimitAsrDataModule(args)
train_dl = timit.train_dataloaders()
valid_dl = timit.valid_dataloaders()
for epoch in range(params.start_epoch, params.num_epochs):
train_dl.sampler.set_epoch(epoch)
if epoch > params.start_epoch:
logging.info(f"epoch {epoch}, lr: {scheduler.get_last_lr()[0]}")
if tb_writer is not None:
tb_writer.add_scalar(
"train/lr",
scheduler.get_last_lr()[0],
params.batch_idx_train,
)
tb_writer.add_scalar("train/epoch", epoch, params.batch_idx_train)
params.cur_epoch = epoch
train_one_epoch(
params=params,
model=model,
optimizer=optimizer,
graph_compiler=graph_compiler,
train_dl=train_dl,
valid_dl=valid_dl,
tb_writer=tb_writer,
world_size=world_size,
)
scheduler.step()
save_checkpoint(
params=params,
model=model,
optimizer=optimizer,
scheduler=scheduler,
rank=rank,
)
logging.info("Done!")
if world_size > 1:
torch.distributed.barrier()
cleanup_dist()
def main():
parser = get_parser()
TimitAsrDataModule.add_arguments(parser)
args = parser.parse_args()
world_size = args.world_size
assert world_size >= 1
if world_size > 1:
mp.spawn(run, args=(world_size, args), nprocs=world_size, join=True)
else:
run(rank=0, world_size=1, args=args)
if __name__ == "__main__":
main()