From 2e33e243481fbbec49fa7a091891fe06e92ccbd4 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Jul 2021 17:47:41 +0800 Subject: [PATCH] Add CI test. --- .github/workflows/test.yml | 57 +++++++++++++++++ egs/librispeech/ASR/local/prepare_lang.py | 3 +- .../ASR/local/test_prepare_lang.py | 9 +-- test/test_lexicon.py | 62 +++++++++++++++++++ 4 files changed, 121 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 test/test_lexicon.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..f65023d3f --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,57 @@ +# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) + +# See ../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: test + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-18.04, macos-10.15] + python-version: [3.7, 3.9] + torch: ["1.8.1"] + k2-version: ["1.2.dev20210723"] + fail-fast: false + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python dependencies + run: | + python3 -m pip install --upgrade pip + pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/ + pip install git+https://github.com/lhotse-speech/lhotse + + - name: Run tests + run: | + export PYTHONPATH=$PWD:$PYTHONPATH + pytest ./test diff --git a/egs/librispeech/ASR/local/prepare_lang.py b/egs/librispeech/ASR/local/prepare_lang.py index f515bdb96..f9864bd02 100755 --- a/egs/librispeech/ASR/local/prepare_lang.py +++ b/egs/librispeech/ASR/local/prepare_lang.py @@ -400,12 +400,11 @@ def main(): sil_prob=sil_prob, need_self_loops=True, ) - # Just for debugging, will remove it torch.save(L.as_dict(), out_dir / "L.pt") torch.save(L_disambig.as_dict(), out_dir / "L_disambig.pt") if False: - + # Just for debugging, will remove it L.labels_sym = k2.SymbolTable.from_file(out_dir / "phones.txt") L.aux_labels_sym = k2.SymbolTable.from_file(out_dir / "words.txt") L_disambig.labels_sym = L.labels_sym diff --git a/egs/librispeech/ASR/local/test_prepare_lang.py b/egs/librispeech/ASR/local/test_prepare_lang.py index b677033be..23ab53c7d 100755 --- a/egs/librispeech/ASR/local/test_prepare_lang.py +++ b/egs/librispeech/ASR/local/test_prepare_lang.py @@ -80,13 +80,6 @@ def test_read_lexicon(filename: str): fsa_disambig.draw("L_disambig.pdf", title="L_disambig") -def test_lexicon(): - from icefall.lexicon import Lexicon - - lexicon = Lexicon("data/lang") - print(lexicon.tokens) - - def main(): filename = generate_lexicon_file() test_read_lexicon(filename) @@ -94,4 +87,4 @@ def main(): if __name__ == "__main__": - test_lexicon() + main() diff --git a/test/test_lexicon.py b/test/test_lexicon.py new file mode 100644 index 000000000..b1b823f98 --- /dev/null +++ b/test/test_lexicon.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 + +import k2 +import pytest +import torch + +from icefall.lexicon import Lexicon + + +@pytest.fixture +def lang_dir(tmp_path): + phone2id = """ + 0 + a 1 + b 2 + f 3 + o 4 + r 5 + z 6 + SPN 7 + #0 8 + """ + word2id = """ + 0 + foo 1 + bar 2 + baz 3 + 4 + #0 5 + """ + + L = k2.Fsa.from_str( + """ + 0 0 7 4 0 + 0 7 -1 -1 0 + 0 1 3 1 0 + 0 3 2 2 0 + 0 5 2 3 0 + 1 2 4 0 0 + 2 0 4 0 0 + 3 4 1 0 0 + 4 0 5 0 0 + 5 6 1 0 0 + 6 0 6 0 0 + 7 + """, + num_aux_labels=1, + ) + + with open(tmp_path / "phones.txt", "w") as f: + f.write(phone2id) + with open(tmp_path / "words.txt", "w") as f: + f.write(word2id) + + torch.save(L.as_dict(), tmp_path / "L.pt") + + return tmp_path + + +def test_lexicon(lang_dir): + lexicon = Lexicon(lang_dir) + assert lexicon.tokens == list(range(1, 8))