Add doc about how to check and use extracted alignments.

This commit is contained in:
Fangjun Kuang 2021-10-18 14:53:32 +08:00
parent d7023c3c4b
commit f383666c40
2 changed files with 30 additions and 6 deletions

View File

@ -51,3 +51,27 @@ in `conformer_ctc/train.py`.
Search `./conformer_ctc/asr_datamodule.py` for `preserve_id`. Search `./conformer_ctc/asr_datamodule.py` for `preserve_id`.
**TODO:** Add doc about how to use the extracted alignment in the other pull-request. **TODO:** Add doc about how to use the extracted alignment in the other pull-request.
### Step 3: Check your extracted alignments
There is a file `test_ali.py` in `icefall/test` that can be used to test your
alignments. It uses pre-computed alignments to modify a randomly generated
`nnet_output` and it checks that we can decode the correct transcripts
from the resulting `nnet_output`.
You should get something like the following if you run that script:
```
$ ./test/test_ali.py
['THE GOOD NATURED AUDIENCE IN PITY TO FALLEN MAJESTY SHOWED FOR ONCE GREATER DEFERENCE TO THE KING THAN TO THE MINISTER AND SUNG THE PSALM WHICH THE FORMER HAD CALLED FOR', 'THE OLD SERVANT TOLD HIM QUIETLY AS THEY CREPT BACK TO DWELL THAT THIS PASSAGE THAT LED FROM THE HUT IN THE PLEASANCE TO SHERWOOD AND THAT GEOFFREY FOR THE TIME WAS HIDING WITH THE OUTLAWS IN THE FOREST', 'FOR A WHILE SHE LAY IN HER CHAIR IN HAPPY DREAMY PLEASURE AT SUN AND BIRD AND TREE', "BUT THE ESSENCE OF LUTHER'S LECTURES IS THERE"]
['THE GOOD NATURED AUDIENCE IN PITY TO FALLEN MAJESTY SHOWED FOR ONCE GREATER DEFERENCE TO THE KING THAN TO THE MINISTER AND SUNG THE PSALM WHICH THE FORMER HAD CALLED FOR', 'THE OLD SERVANT TOLD HIM QUIETLY AS THEY CREPT BACK TO GAMEWELL THAT THIS PASSAGE WAY LED FROM THE HUT IN THE PLEASANCE TO SHERWOOD AND THAT GEOFFREY FOR THE TIME WAS HIDING WITH THE OUTLAWS IN THE FOREST', 'FOR A WHILE SHE LAY IN HER CHAIR IN HAPPY DREAMY PLEASURE AT SUN AND BIRD AND TREE', "BUT THE ESSENCE OF LUTHER'S LECTURES IS THERE"]
```
### Step 4: Use your alignments in training
Please refer to `conformer_mmi/train.py` for how usage. Some useful
functions are:
- `load_alignments()`, it loads alignment saved by `conformer_ctc/ali.py`
- `convert_alignments_to_tensor()`, it converts alignments to PyTorch tensors
- `lookup_alignments()`, it returns the alignments of utterances by giving the cut ID of the utterances.

View File

@ -45,12 +45,12 @@ ICEFALL_DIR = Path(__file__).resolve().parent.parent
egs_dir = ICEFALL_DIR / "egs/librispeech/ASR" egs_dir = ICEFALL_DIR / "egs/librispeech/ASR"
lang_dir = egs_dir / "data/lang_bpe_500" lang_dir = egs_dir / "data/lang_bpe_500"
# cut_json = egs_dir / "data/fbank/cuts_train-clean-100.json.gz" # cut_json = egs_dir / "data/fbank/cuts_train-clean-100.json.gz"
cut_json = egs_dir / "data/fbank/cuts_train-clean-360.json.gz" # cut_json = egs_dir / "data/fbank/cuts_train-clean-360.json.gz"
# cut_json = egs_dir / "data/fbank/cuts_train-other-500.json.gz" # cut_json = egs_dir / "data/fbank/cuts_train-other-500.json.gz"
ali_filename = ICEFALL_DIR / "egs/librispeech/ASR/data/ali_500/train-960.pt" # ali_filename = ICEFALL_DIR / "egs/librispeech/ASR/data/ali_500/train-960.pt"
# cut_json = egs_dir / "data/fbank/cuts_test-clean.json.gz" cut_json = egs_dir / "data/fbank/cuts_test-clean.json.gz"
# ali_filename = ICEFALL_DIR / "egs/librispeech/ASR/data/ali_500/test_clean.pt" ali_filename = ICEFALL_DIR / "egs/librispeech/ASR/data/ali_500/test_clean.pt"
def data_exists(): def data_exists():
@ -62,7 +62,7 @@ def get_dataloader():
cuts_train = cuts_train.with_features_path_prefix(egs_dir) cuts_train = cuts_train.with_features_path_prefix(egs_dir)
train_sampler = SingleCutSampler( train_sampler = SingleCutSampler(
cuts_train, cuts_train,
max_duration=200, max_duration=40,
shuffle=False, shuffle=False,
) )
@ -162,7 +162,7 @@ def test():
lattice = get_lattice( lattice = get_lattice(
nnet_output=nnet_output, nnet_output=nnet_output,
HLG=HLG, decoding_graph=HLG,
supervision_segments=supervision_segments, supervision_segments=supervision_segments,
search_beam=20, search_beam=20,
output_beam=8, output_beam=8,