Fix CI test for gigaspeech (#1787)

This commit is contained in:
Fangjun Kuang 2024-10-29 15:50:49 +08:00 committed by GitHub
parent 516b4869b3
commit f23c8ce9dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 23 additions and 15 deletions

View File

@ -19,7 +19,7 @@ repo=$(basename $repo_url)
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p pruned_transducer_stateless2/exp mkdir -p pruned_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/ ln -s $PWD/$repo/data/lang_bpe_500 data/
@ -29,8 +29,16 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
ls -lh data/fbank ls -lh data/fbank
ls -lh pruned_transducer_stateless2/exp ls -lh pruned_transducer_stateless2/exp
ln -sf data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz pushd data/fbank
ln -sf data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
popd
log "Decoding dev and test" log "Decoding dev and test"

View File

@ -162,7 +162,7 @@ if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
--ngram-lm-scale -0.16 --ngram-lm-scale -0.16
fi fi
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
mkdir -p lstm_transducer_stateless2/exp mkdir -p lstm_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/ ln -s $PWD/$repo/data/lang_bpe_500 data/
@ -175,7 +175,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
# use a small value for decoding with CPU # use a small value for decoding with CPU
max_duration=100 max_duration=100
for method in greedy_search fast_beam_search modified_beam_search; do for method in greedy_search fast_beam_search; do
log "Decoding with $method" log "Decoding with $method"
./lstm_transducer_stateless2/decode.py \ ./lstm_transducer_stateless2/decode.py \

View File

@ -41,7 +41,7 @@ concurrency:
jobs: jobs:
run_gigaspeech_2022_05_13: run_gigaspeech_2022_05_13:
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
@ -106,7 +106,7 @@ jobs:
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh .github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
- name: Display decoding results for gigaspeech pruned_transducer_stateless2 - name: Display decoding results for gigaspeech pruned_transducer_stateless2
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
shell: bash shell: bash
run: | run: |
cd egs/gigaspeech/ASR/ cd egs/gigaspeech/ASR/
@ -122,7 +122,7 @@ jobs:
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2 - name: Upload decoding results for gigaspeech pruned_transducer_stateless2
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
with: with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12 name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/ path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/

View File

@ -24,7 +24,7 @@ concurrency:
jobs: jobs:
run_librispeech_lstm_transducer_stateless2_2022_09_03: run_librispeech_lstm_transducer_stateless2_2022_09_03:
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
@ -116,7 +116,7 @@ jobs:
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh .github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
- name: Display decoding results for lstm_transducer_stateless2 - name: Display decoding results for lstm_transducer_stateless2
if: github.event_name == 'schedule' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
shell: bash shell: bash
run: | run: |
cd egs/librispeech/ASR cd egs/librispeech/ASR
@ -130,9 +130,9 @@ jobs:
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
echo "===modified beam search===" # echo "===modified beam search==="
find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 # find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 # find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
- name: Display decoding results for lstm_transducer_stateless2 - name: Display decoding results for lstm_transducer_stateless2
if: github.event.label.name == 'shallow-fusion' if: github.event.label.name == 'shallow-fusion'
@ -159,7 +159,7 @@ jobs:
- name: Upload decoding results for lstm_transducer_stateless2 - name: Upload decoding results for lstm_transducer_stateless2
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch'
with: with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03 name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/ path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/

View File

@ -260,7 +260,7 @@ def decode_one_batch(
Return the decoding result. See above description for the format of Return the decoding result. See above description for the format of
the returned dict. the returned dict.
""" """
device = model.device device = next(model.parameters()).device
feature = batch["inputs"] feature = batch["inputs"]
assert feature.ndim == 3 assert feature.ndim == 3