From 31aaee60e89eaf7e3786ea4ffc86f58e470c3881 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 29 Oct 2024 16:08:06 +0800 Subject: [PATCH] Fix more Gigaspeech CI errors --- .../run-gigaspeech-zipformer-2023-10-17.sh | 18 +++++++++++++-- .../run-gigaspeech-zipformer-2023-10-17.yml | 22 ++++++++----------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh b/.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh index 329896ef6..438edd3b1 100755 --- a/.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh +++ b/.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh @@ -129,20 +129,34 @@ done echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then +if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then mkdir -p zipformer/exp ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt + mkdir -p data ln -s $PWD/$repo/data/lang_bpe_500 data/ ls -lh data ls -lh zipformer/exp + mkdir -p data/fbank + pushd data/fbank + + curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz + curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz + curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca + curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca + + ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz + ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz + + popd + log "Decoding test-clean and test-other" # use a small value for decoding with CPU max_duration=100 - for method in greedy_search fast_beam_search modified_beam_search; do + for method in greedy_search; do log "Decoding with $method" ./zipformer/decode.py \ diff --git a/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml b/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml index 4ecc2aea0..48322e75c 100644 --- a/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml +++ b/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml @@ -90,10 +90,6 @@ jobs: GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | - mkdir -p egs/gigaspeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/gigaspeech/ASR/data/fbank - ls -lh egs/gigaspeech/ASR/data/* - sudo apt-get -qq install git-lfs tree export PYTHONPATH=$PWD:$PYTHONPATH export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH @@ -112,7 +108,7 @@ jobs: tag: asr-models - name: Display decoding results for gigaspeech zipformer - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' + if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch' shell: bash run: | cd egs/gigaspeech/ASR/ @@ -124,17 +120,17 @@ jobs: find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + # echo "===fast_beam_search===" + # find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + # find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 + # + # echo "===modified beam search===" + # find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 + # find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - name: Upload decoding results for gigaspeech zipformer uses: actions/upload-artifact@v4 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' + if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch' with: name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11 path: egs/gigaspeech/ASR/zipformer/exp/