mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Fix CI test for gigaspeech (#1787)
This commit is contained in:
parent
516b4869b3
commit
f23c8ce9dd
@ -19,7 +19,7 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
mkdir -p pruned_transducer_stateless2/exp
|
mkdir -p pruned_transducer_stateless2/exp
|
||||||
ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
|
ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
|
||||||
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
@ -29,8 +29,16 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
|
|||||||
ls -lh data/fbank
|
ls -lh data/fbank
|
||||||
ls -lh pruned_transducer_stateless2/exp
|
ls -lh pruned_transducer_stateless2/exp
|
||||||
|
|
||||||
ln -sf data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz
|
pushd data/fbank
|
||||||
ln -sf data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz
|
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
|
||||||
|
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
|
||||||
|
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
|
||||||
|
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
|
||||||
|
|
||||||
|
ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
|
||||||
|
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
|
||||||
|
popd
|
||||||
|
|
||||||
|
|
||||||
log "Decoding dev and test"
|
log "Decoding dev and test"
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@ if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
|
|||||||
--ngram-lm-scale -0.16
|
--ngram-lm-scale -0.16
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
|
||||||
mkdir -p lstm_transducer_stateless2/exp
|
mkdir -p lstm_transducer_stateless2/exp
|
||||||
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
||||||
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
@ -175,7 +175,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
|
|||||||
# use a small value for decoding with CPU
|
# use a small value for decoding with CPU
|
||||||
max_duration=100
|
max_duration=100
|
||||||
|
|
||||||
for method in greedy_search fast_beam_search modified_beam_search; do
|
for method in greedy_search fast_beam_search; do
|
||||||
log "Decoding with $method"
|
log "Decoding with $method"
|
||||||
|
|
||||||
./lstm_transducer_stateless2/decode.py \
|
./lstm_transducer_stateless2/decode.py \
|
||||||
|
@ -41,7 +41,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_gigaspeech_2022_05_13:
|
run_gigaspeech_2022_05_13:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@ -106,7 +106,7 @@ jobs:
|
|||||||
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
|
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
|
||||||
|
|
||||||
- name: Display decoding results for gigaspeech pruned_transducer_stateless2
|
- name: Display decoding results for gigaspeech pruned_transducer_stateless2
|
||||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd egs/gigaspeech/ASR/
|
cd egs/gigaspeech/ASR/
|
||||||
@ -122,7 +122,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2
|
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
|
||||||
with:
|
with:
|
||||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
|
||||||
path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/
|
path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/
|
||||||
|
@ -24,7 +24,7 @@ concurrency:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_lstm_transducer_stateless2_2022_09_03:
|
run_librispeech_lstm_transducer_stateless2_2022_09_03:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@ -116,7 +116,7 @@ jobs:
|
|||||||
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
|
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
|
||||||
|
|
||||||
- name: Display decoding results for lstm_transducer_stateless2
|
- name: Display decoding results for lstm_transducer_stateless2
|
||||||
if: github.event_name == 'schedule'
|
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd egs/librispeech/ASR
|
cd egs/librispeech/ASR
|
||||||
@ -130,9 +130,9 @@ jobs:
|
|||||||
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
echo "===modified beam search==="
|
# echo "===modified beam search==="
|
||||||
find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
- name: Display decoding results for lstm_transducer_stateless2
|
- name: Display decoding results for lstm_transducer_stateless2
|
||||||
if: github.event.label.name == 'shallow-fusion'
|
if: github.event.label.name == 'shallow-fusion'
|
||||||
@ -159,7 +159,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Upload decoding results for lstm_transducer_stateless2
|
- name: Upload decoding results for lstm_transducer_stateless2
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR'
|
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch'
|
||||||
with:
|
with:
|
||||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
|
||||||
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
|
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
|
||||||
|
@ -260,7 +260,7 @@ def decode_one_batch(
|
|||||||
Return the decoding result. See above description for the format of
|
Return the decoding result. See above description for the format of
|
||||||
the returned dict.
|
the returned dict.
|
||||||
"""
|
"""
|
||||||
device = model.device
|
device = next(model.parameters()).device
|
||||||
feature = batch["inputs"]
|
feature = batch["inputs"]
|
||||||
assert feature.ndim == 3
|
assert feature.ndim == 3
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user