mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
63 lines
1013 B
Python
63 lines
1013 B
Python
#!/usr/bin/env python3
|
|
|
|
import k2
|
|
import pytest
|
|
import torch
|
|
|
|
from icefall.lexicon import Lexicon
|
|
|
|
|
|
@pytest.fixture
|
|
def lang_dir(tmp_path):
|
|
phone2id = """
|
|
<eps> 0
|
|
a 1
|
|
b 2
|
|
f 3
|
|
o 4
|
|
r 5
|
|
z 6
|
|
SPN 7
|
|
#0 8
|
|
"""
|
|
word2id = """
|
|
<eps> 0
|
|
foo 1
|
|
bar 2
|
|
baz 3
|
|
<UNK> 4
|
|
#0 5
|
|
"""
|
|
|
|
L = k2.Fsa.from_str(
|
|
"""
|
|
0 0 7 4 0
|
|
0 7 -1 -1 0
|
|
0 1 3 1 0
|
|
0 3 2 2 0
|
|
0 5 2 3 0
|
|
1 2 4 0 0
|
|
2 0 4 0 0
|
|
3 4 1 0 0
|
|
4 0 5 0 0
|
|
5 6 1 0 0
|
|
6 0 6 0 0
|
|
7
|
|
""",
|
|
num_aux_labels=1,
|
|
)
|
|
|
|
with open(tmp_path / "phones.txt", "w") as f:
|
|
f.write(phone2id)
|
|
with open(tmp_path / "words.txt", "w") as f:
|
|
f.write(word2id)
|
|
|
|
torch.save(L.as_dict(), tmp_path / "L.pt")
|
|
|
|
return tmp_path
|
|
|
|
|
|
def test_lexicon(lang_dir):
|
|
lexicon = Lexicon(lang_dir)
|
|
assert lexicon.tokens == list(range(1, 8))
|