mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
* Use modified transducer loss in training. * Minor fix. * Add modified beam search. * Add modified beam search. * Minor fixes. * Fix typo. * Update RESULTS. * Fix a typo. * Minor fixes.
152 lines
7.0 KiB
YAML
152 lines
7.0 KiB
YAML
# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com)
|
|
|
|
# See ../../LICENSE for clarification regarding multiple authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
name: run-pre-trained-trandsucer-stateless
|
|
|
|
on:
|
|
push:
|
|
branches:
|
|
- master
|
|
pull_request:
|
|
types: [labeled]
|
|
|
|
jobs:
|
|
run_pre_trained_transducer_stateless:
|
|
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
|
runs-on: ${{ matrix.os }}
|
|
strategy:
|
|
matrix:
|
|
os: [ubuntu-18.04]
|
|
python-version: [3.7, 3.8, 3.9]
|
|
torch: ["1.10.0"]
|
|
torchaudio: ["0.10.0"]
|
|
k2-version: ["1.9.dev20211101"]
|
|
|
|
fail-fast: false
|
|
|
|
steps:
|
|
- uses: actions/checkout@v2
|
|
with:
|
|
fetch-depth: 0
|
|
|
|
- name: Setup Python ${{ matrix.python-version }}
|
|
uses: actions/setup-python@v1
|
|
with:
|
|
python-version: ${{ matrix.python-version }}
|
|
|
|
- name: Install Python dependencies
|
|
run: |
|
|
python3 -m pip install --upgrade pip pytest
|
|
# numpy 1.20.x does not support python 3.6
|
|
pip install numpy==1.19
|
|
pip install torch==${{ matrix.torch }}+cpu torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
|
pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/
|
|
|
|
python3 -m pip install git+https://github.com/lhotse-speech/lhotse
|
|
python3 -m pip install kaldifeat
|
|
# We are in ./icefall and there is a file: requirements.txt in it
|
|
pip install -r requirements.txt
|
|
|
|
- name: Install graphviz
|
|
shell: bash
|
|
run: |
|
|
python3 -m pip install -qq graphviz
|
|
sudo apt-get -qq install graphviz
|
|
|
|
- name: Download pre-trained model
|
|
shell: bash
|
|
run: |
|
|
sudo apt-get -qq install git-lfs tree sox
|
|
cd egs/librispeech/ASR
|
|
mkdir tmp
|
|
cd tmp
|
|
git lfs install
|
|
git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07
|
|
cd ..
|
|
tree tmp
|
|
soxi tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/*.wav
|
|
ls -lh tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/*.wav
|
|
|
|
- name: Run greedy search decoding (max-sym-per-frame 1)
|
|
shell: bash
|
|
run: |
|
|
export PYTHONPATH=$PWD:PYTHONPATH
|
|
cd egs/librispeech/ASR
|
|
./transducer_stateless/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame 1 \
|
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
|
|
|
- name: Run greedy search decoding (max-sym-per-frame 2)
|
|
shell: bash
|
|
run: |
|
|
export PYTHONPATH=$PWD:PYTHONPATH
|
|
cd egs/librispeech/ASR
|
|
./transducer_stateless/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame 2 \
|
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
|
|
|
- name: Run greedy search decoding (max-sym-per-frame 3)
|
|
shell: bash
|
|
run: |
|
|
export PYTHONPATH=$PWD:PYTHONPATH
|
|
cd egs/librispeech/ASR
|
|
./transducer_stateless/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame 3 \
|
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
|
|
|
- name: Run beam search decoding
|
|
shell: bash
|
|
run: |
|
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
|
cd egs/librispeech/ASR
|
|
./transducer_stateless/pretrained.py \
|
|
--method beam_search \
|
|
--beam-size 4 \
|
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
|
|
|
- name: Run modified beam search decoding
|
|
shell: bash
|
|
run: |
|
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
|
cd egs/librispeech/ASR
|
|
./transducer_stateless/pretrained.py \
|
|
--method modified_beam_search \
|
|
--beam-size 4 \
|
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|