mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Rand combine update result (#467)
* update RESULTS.md * fix test code in pruned_transducer_stateless5/conformer.py * minor fix * delete doc * fix style
This commit is contained in:
parent
6c69c4e253
commit
ce26495238
@ -556,9 +556,9 @@ Number of model parameters 118129516 (i.e, 118.13 M).
|
|||||||
|
|
||||||
| | test-clean | test-other | comment |
|
| | test-clean | test-other | comment |
|
||||||
|-------------------------------------|------------|------------|----------------------------------------|
|
|-------------------------------------|------------|------------|----------------------------------------|
|
||||||
| greedy search (max sym per frame 1) | 2.39 | 5.57 | --epoch 39 --avg 7 --max-duration 600 |
|
| greedy search (max sym per frame 1) | 2.43 | 5.72 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
| modified beam search | 2.35 | 5.50 | --epoch 39 --avg 7 --max-duration 600 |
|
| modified beam search | 2.43 | 5.69 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
| fast beam search | 2.38 | 5.50 | --epoch 39 --avg 7 --max-duration 600 |
|
| fast beam search | 2.43 | 5.67 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
|
|
||||||
The training commands are:
|
The training commands are:
|
||||||
|
|
||||||
@ -567,8 +567,8 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
|||||||
|
|
||||||
./pruned_transducer_stateless5/train.py \
|
./pruned_transducer_stateless5/train.py \
|
||||||
--world-size 8 \
|
--world-size 8 \
|
||||||
--num-epochs 40 \
|
--num-epochs 30 \
|
||||||
--start-epoch 0 \
|
--start-epoch 1 \
|
||||||
--full-libri 1 \
|
--full-libri 1 \
|
||||||
--exp-dir pruned_transducer_stateless5/exp-L \
|
--exp-dir pruned_transducer_stateless5/exp-L \
|
||||||
--max-duration 300 \
|
--max-duration 300 \
|
||||||
@ -582,15 +582,15 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
|||||||
```
|
```
|
||||||
|
|
||||||
The tensorboard log can be found at
|
The tensorboard log can be found at
|
||||||
<https://tensorboard.dev/experiment/Zq0h3KpnQ2igWbeR4U82Pw/>
|
<https://tensorboard.dev/experiment/aWzDj5swSE2VmcOYgoe3vQ>
|
||||||
|
|
||||||
The decoding commands are:
|
The decoding commands are:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
for method in greedy_search modified_beam_search fast_beam_search; do
|
for method in greedy_search modified_beam_search fast_beam_search; do
|
||||||
./pruned_transducer_stateless5/decode.py \
|
./pruned_transducer_stateless5/decode.py \
|
||||||
--epoch 39 \
|
--epoch 30 \
|
||||||
--avg 7 \
|
--avg 10 \
|
||||||
--exp-dir ./pruned_transducer_stateless5/exp-L \
|
--exp-dir ./pruned_transducer_stateless5/exp-L \
|
||||||
--max-duration 600 \
|
--max-duration 600 \
|
||||||
--decoding-method $method \
|
--decoding-method $method \
|
||||||
@ -600,13 +600,14 @@ for method in greedy_search modified_beam_search fast_beam_search; do
|
|||||||
--nhead 8 \
|
--nhead 8 \
|
||||||
--encoder-dim 512 \
|
--encoder-dim 512 \
|
||||||
--decoder-dim 512 \
|
--decoder-dim 512 \
|
||||||
--joiner-dim 512
|
--joiner-dim 512 \
|
||||||
|
--use-averaged-model True
|
||||||
done
|
done
|
||||||
```
|
```
|
||||||
|
|
||||||
You can find a pretrained model, training logs, decoding logs, and decoding
|
You can find a pretrained model, training logs, decoding logs, and decoding
|
||||||
results at:
|
results at:
|
||||||
<https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13>
|
<https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless5-2022-07-07>
|
||||||
|
|
||||||
|
|
||||||
#### Medium
|
#### Medium
|
||||||
@ -615,9 +616,9 @@ Number of model parameters 30896748 (i.e, 30.9 M).
|
|||||||
|
|
||||||
| | test-clean | test-other | comment |
|
| | test-clean | test-other | comment |
|
||||||
|-------------------------------------|------------|------------|-----------------------------------------|
|
|-------------------------------------|------------|------------|-----------------------------------------|
|
||||||
| greedy search (max sym per frame 1) | 2.88 | 6.69 | --epoch 39 --avg 17 --max-duration 600 |
|
| greedy search (max sym per frame 1) | 2.87 | 6.92 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
| modified beam search | 2.83 | 6.59 | --epoch 39 --avg 17 --max-duration 600 |
|
| modified beam search | 2.83 | 6.75 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
| fast beam search | 2.83 | 6.61 | --epoch 39 --avg 17 --max-duration 600 |
|
| fast beam search | 2.81 | 6.76 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
|
|
||||||
The training commands are:
|
The training commands are:
|
||||||
|
|
||||||
@ -626,8 +627,8 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
|||||||
|
|
||||||
./pruned_transducer_stateless5/train.py \
|
./pruned_transducer_stateless5/train.py \
|
||||||
--world-size 8 \
|
--world-size 8 \
|
||||||
--num-epochs 40 \
|
--num-epochs 30 \
|
||||||
--start-epoch 0 \
|
--start-epoch 1 \
|
||||||
--full-libri 1 \
|
--full-libri 1 \
|
||||||
--exp-dir pruned_transducer_stateless5/exp-M \
|
--exp-dir pruned_transducer_stateless5/exp-M \
|
||||||
--max-duration 300 \
|
--max-duration 300 \
|
||||||
@ -641,15 +642,15 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
|||||||
```
|
```
|
||||||
|
|
||||||
The tensorboard log can be found at
|
The tensorboard log can be found at
|
||||||
<https://tensorboard.dev/experiment/bOQvULPsQ1iL7xpdI0VbXw/>
|
<https://tensorboard.dev/experiment/04xtWUKPRmebSnpzN1GMHQ>
|
||||||
|
|
||||||
The decoding commands are:
|
The decoding commands are:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
for method in greedy_search modified_beam_search fast_beam_search; do
|
for method in greedy_search modified_beam_search fast_beam_search; do
|
||||||
./pruned_transducer_stateless5/decode.py \
|
./pruned_transducer_stateless5/decode.py \
|
||||||
--epoch 39 \
|
--epoch 30 \
|
||||||
--avg 17 \
|
--avg 10 \
|
||||||
--exp-dir ./pruned_transducer_stateless5/exp-M \
|
--exp-dir ./pruned_transducer_stateless5/exp-M \
|
||||||
--max-duration 600 \
|
--max-duration 600 \
|
||||||
--decoding-method $method \
|
--decoding-method $method \
|
||||||
@ -659,13 +660,14 @@ for method in greedy_search modified_beam_search fast_beam_search; do
|
|||||||
--nhead 4 \
|
--nhead 4 \
|
||||||
--encoder-dim 256 \
|
--encoder-dim 256 \
|
||||||
--decoder-dim 512 \
|
--decoder-dim 512 \
|
||||||
--joiner-dim 512
|
--joiner-dim 512 \
|
||||||
|
--use-averaged-model True
|
||||||
done
|
done
|
||||||
```
|
```
|
||||||
|
|
||||||
You can find a pretrained model, training logs, decoding logs, and decoding
|
You can find a pretrained model, training logs, decoding logs, and decoding
|
||||||
results at:
|
results at:
|
||||||
<https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-M-2022-05-13>
|
<https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless5-M-2022-07-07>
|
||||||
|
|
||||||
|
|
||||||
#### Baseline-2
|
#### Baseline-2
|
||||||
@ -675,19 +677,19 @@ layers (24 v.s 12) but a narrower model (1536 feedforward dim and 384 encoder di
|
|||||||
|
|
||||||
| | test-clean | test-other | comment |
|
| | test-clean | test-other | comment |
|
||||||
|-------------------------------------|------------|------------|-----------------------------------------|
|
|-------------------------------------|------------|------------|-----------------------------------------|
|
||||||
| greedy search (max sym per frame 1) | 2.41 | 5.70 | --epoch 31 --avg 17 --max-duration 600 |
|
| greedy search (max sym per frame 1) | 2.54 | 5.72 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
| modified beam search | 2.41 | 5.69 | --epoch 31 --avg 17 --max-duration 600 |
|
| modified beam search | 2.47 | 5.71 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
| fast beam search | 2.41 | 5.69 | --epoch 31 --avg 17 --max-duration 600 |
|
| fast beam search | 2.5 | 5.72 | --epoch 30 --avg 10 --max-duration 600 |
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
||||||
|
|
||||||
./pruned_transducer_stateless5/train.py \
|
./pruned_transducer_stateless5/train.py \
|
||||||
--world-size 8 \
|
--world-size 8 \
|
||||||
--num-epochs 40 \
|
--num-epochs 30 \
|
||||||
--start-epoch 0 \
|
--start-epoch 1 \
|
||||||
--full-libri 1 \
|
--full-libri 1 \
|
||||||
--exp-dir pruned_transducer_stateless5/exp \
|
--exp-dir pruned_transducer_stateless5/exp-B \
|
||||||
--max-duration 300 \
|
--max-duration 300 \
|
||||||
--use-fp16 0 \
|
--use-fp16 0 \
|
||||||
--num-encoder-layers 24 \
|
--num-encoder-layers 24 \
|
||||||
@ -699,19 +701,16 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
|||||||
```
|
```
|
||||||
|
|
||||||
The tensorboard log can be found at
|
The tensorboard log can be found at
|
||||||
<https://tensorboard.dev/experiment/73oY9U1mQiq0tbbcovZplw/>
|
<https://tensorboard.dev/experiment/foVHNyqiRi2LhybmRUOAyg>
|
||||||
|
|
||||||
**Caution**: The training script is updated so that epochs are counted from 1
|
|
||||||
after the training.
|
|
||||||
|
|
||||||
The decoding commands are:
|
The decoding commands are:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
for method in greedy_search modified_beam_search fast_beam_search; do
|
for method in greedy_search modified_beam_search fast_beam_search; do
|
||||||
./pruned_transducer_stateless5/decode.py \
|
./pruned_transducer_stateless5/decode.py \
|
||||||
--epoch 31 \
|
--epoch 30 \
|
||||||
--avg 17 \
|
--avg 10 \
|
||||||
--exp-dir ./pruned_transducer_stateless5/exp-M \
|
--exp-dir ./pruned_transducer_stateless5/exp-B \
|
||||||
--max-duration 600 \
|
--max-duration 600 \
|
||||||
--decoding-method $method \
|
--decoding-method $method \
|
||||||
--max-sym-per-frame 1 \
|
--max-sym-per-frame 1 \
|
||||||
@ -720,13 +719,14 @@ for method in greedy_search modified_beam_search fast_beam_search; do
|
|||||||
--nhead 8 \
|
--nhead 8 \
|
||||||
--encoder-dim 384 \
|
--encoder-dim 384 \
|
||||||
--decoder-dim 512 \
|
--decoder-dim 512 \
|
||||||
--joiner-dim 512
|
--joiner-dim 512 \
|
||||||
|
--use-averaged-model True
|
||||||
done
|
done
|
||||||
```
|
```
|
||||||
|
|
||||||
You can find a pretrained model, training logs, decoding logs, and decoding
|
You can find a pretrained model, training logs, decoding logs, and decoding
|
||||||
results at:
|
results at:
|
||||||
<https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-narrower-2022-05-13>
|
<https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless5-B-2022-07-07>
|
||||||
|
|
||||||
|
|
||||||
### LibriSpeech BPE training results (Pruned Stateless Transducer 4)
|
### LibriSpeech BPE training results (Pruned Stateless Transducer 4)
|
||||||
|
@ -1064,10 +1064,6 @@ class RandomCombine(nn.Module):
|
|||||||
is a random combination of all the inputs; but which in test time
|
is a random combination of all the inputs; but which in test time
|
||||||
will be just the last input.
|
will be just the last input.
|
||||||
|
|
||||||
All but the last input will have a linear transform before we
|
|
||||||
randomly combine them; these linear transforms will be initialized
|
|
||||||
to the identity transform.
|
|
||||||
|
|
||||||
The idea is that the list of Tensors will be a list of outputs of multiple
|
The idea is that the list of Tensors will be a list of outputs of multiple
|
||||||
conformer layers. This has a similar effect as iterated loss. (See:
|
conformer layers. This has a similar effect as iterated loss. (See:
|
||||||
DEJA-VU: DOUBLE FEATURE PRESENTATION AND ITERATED LOSS IN DEEP TRANSFORMER
|
DEJA-VU: DOUBLE FEATURE PRESENTATION AND ITERATED LOSS IN DEEP TRANSFORMER
|
||||||
@ -1267,7 +1263,6 @@ def _test_random_combine(final_weight: float, pure_prob: float, stddev: float):
|
|||||||
num_channels = 50
|
num_channels = 50
|
||||||
m = RandomCombine(
|
m = RandomCombine(
|
||||||
num_inputs=num_inputs,
|
num_inputs=num_inputs,
|
||||||
num_channels=num_channels,
|
|
||||||
final_weight=final_weight,
|
final_weight=final_weight,
|
||||||
pure_prob=pure_prob,
|
pure_prob=pure_prob,
|
||||||
stddev=stddev,
|
stddev=stddev,
|
||||||
@ -1289,9 +1284,7 @@ def _test_random_combine_main():
|
|||||||
_test_random_combine(0.5, 0.5, 0.3)
|
_test_random_combine(0.5, 0.5, 0.3)
|
||||||
|
|
||||||
feature_dim = 50
|
feature_dim = 50
|
||||||
c = Conformer(
|
c = Conformer(num_features=feature_dim, d_model=128, nhead=4)
|
||||||
num_features=feature_dim, output_dim=256, d_model=128, nhead=4
|
|
||||||
)
|
|
||||||
batch_size = 5
|
batch_size = 5
|
||||||
seq_len = 20
|
seq_len = 20
|
||||||
# Just make sure the forward pass runs.
|
# Just make sure the forward pass runs.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user