icefall/test/test_lexicon.py
2021-07-24 17:47:41 +08:00

63 lines
1013 B
Python

#!/usr/bin/env python3
import k2
import pytest
import torch
from icefall.lexicon import Lexicon
@pytest.fixture
def lang_dir(tmp_path):
phone2id = """
<eps> 0
a 1
b 2
f 3
o 4
r 5
z 6
SPN 7
#0 8
"""
word2id = """
<eps> 0
foo 1
bar 2
baz 3
<UNK> 4
#0 5
"""
L = k2.Fsa.from_str(
"""
0 0 7 4 0
0 7 -1 -1 0
0 1 3 1 0
0 3 2 2 0
0 5 2 3 0
1 2 4 0 0
2 0 4 0 0
3 4 1 0 0
4 0 5 0 0
5 6 1 0 0
6 0 6 0 0
7
""",
num_aux_labels=1,
)
with open(tmp_path / "phones.txt", "w") as f:
f.write(phone2id)
with open(tmp_path / "words.txt", "w") as f:
f.write(word2id)
torch.save(L.as_dict(), tmp_path / "L.pt")
return tmp_path
def test_lexicon(lang_dir):
lexicon = Lexicon(lang_dir)
assert lexicon.tokens == list(range(1, 8))