#!/usr/bin/env python3 import k2 import pytest import torch from icefall.lexicon import Lexicon @pytest.fixture def lang_dir(tmp_path): phone2id = """ 0 a 1 b 2 f 3 o 4 r 5 z 6 SPN 7 #0 8 """ word2id = """ 0 foo 1 bar 2 baz 3 4 #0 5 """ L = k2.Fsa.from_str( """ 0 0 7 4 0 0 7 -1 -1 0 0 1 3 1 0 0 3 2 2 0 0 5 2 3 0 1 2 4 0 0 2 0 4 0 0 3 4 1 0 0 4 0 5 0 0 5 6 1 0 0 6 0 6 0 0 7 """, num_aux_labels=1, ) with open(tmp_path / "phones.txt", "w") as f: f.write(phone2id) with open(tmp_path / "words.txt", "w") as f: f.write(word2id) torch.save(L.as_dict(), tmp_path / "L.pt") return tmp_path def test_lexicon(lang_dir): lexicon = Lexicon(lang_dir) assert lexicon.tokens == list(range(1, 8))