mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Fix CI test for gigaspeech (#1787)
This commit is contained in:
parent
516b4869b3
commit
f23c8ce9dd
@ -19,7 +19,7 @@ repo=$(basename $repo_url)
|
||||
|
||||
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||
mkdir -p pruned_transducer_stateless2/exp
|
||||
ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
|
||||
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||
@ -29,8 +29,16 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
|
||||
ls -lh data/fbank
|
||||
ls -lh pruned_transducer_stateless2/exp
|
||||
|
||||
ln -sf data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz
|
||||
ln -sf data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz
|
||||
pushd data/fbank
|
||||
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
|
||||
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
|
||||
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
|
||||
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
|
||||
|
||||
ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
|
||||
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
|
||||
popd
|
||||
|
||||
|
||||
log "Decoding dev and test"
|
||||
|
||||
|
@ -162,7 +162,7 @@ if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
|
||||
--ngram-lm-scale -0.16
|
||||
fi
|
||||
|
||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
|
||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
|
||||
mkdir -p lstm_transducer_stateless2/exp
|
||||
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
||||
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||
@ -175,7 +175,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
|
||||
# use a small value for decoding with CPU
|
||||
max_duration=100
|
||||
|
||||
for method in greedy_search fast_beam_search modified_beam_search; do
|
||||
for method in greedy_search fast_beam_search; do
|
||||
log "Decoding with $method"
|
||||
|
||||
./lstm_transducer_stateless2/decode.py \
|
||||
|
@ -41,7 +41,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
run_gigaspeech_2022_05_13:
|
||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||
if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
@ -106,7 +106,7 @@ jobs:
|
||||
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
|
||||
|
||||
- name: Display decoding results for gigaspeech pruned_transducer_stateless2
|
||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
|
||||
shell: bash
|
||||
run: |
|
||||
cd egs/gigaspeech/ASR/
|
||||
@ -122,7 +122,7 @@ jobs:
|
||||
|
||||
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2
|
||||
uses: actions/upload-artifact@v4
|
||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
|
||||
path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/
|
||||
|
@ -24,7 +24,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
run_librispeech_lstm_transducer_stateless2_2022_09_03:
|
||||
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
@ -116,7 +116,7 @@ jobs:
|
||||
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
|
||||
|
||||
- name: Display decoding results for lstm_transducer_stateless2
|
||||
if: github.event_name == 'schedule'
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
shell: bash
|
||||
run: |
|
||||
cd egs/librispeech/ASR
|
||||
@ -130,9 +130,9 @@ jobs:
|
||||
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||
|
||||
echo "===modified beam search==="
|
||||
find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||
find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||
# echo "===modified beam search==="
|
||||
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||
|
||||
- name: Display decoding results for lstm_transducer_stateless2
|
||||
if: github.event.label.name == 'shallow-fusion'
|
||||
@ -159,7 +159,7 @@ jobs:
|
||||
|
||||
- name: Upload decoding results for lstm_transducer_stateless2
|
||||
uses: actions/upload-artifact@v4
|
||||
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR'
|
||||
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch'
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
|
||||
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
|
||||
|
@ -260,7 +260,7 @@ def decode_one_batch(
|
||||
Return the decoding result. See above description for the format of
|
||||
the returned dict.
|
||||
"""
|
||||
device = model.device
|
||||
device = next(model.parameters()).device
|
||||
feature = batch["inputs"]
|
||||
assert feature.ndim == 3
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user