Fix CI (#726)

* Fix CI * Disable shuffle for yesno. See https://github.com/k2-fsa/icefall/issues/197
2025-12-11 06:55:27 +00:00 · 2022-12-02 10:53:06 +08:00 · 2022-12-02 10:53:06 +08:00 · 6533f359c9
commit 6533f359c9
parent 04c9fc9c9f
29 changed files with 128 additions and 60 deletions
--- a/.github/workflows/build-doc.yml
+++ b/.github/workflows/build-doc.yml
@ -26,6 +26,10 @@ on:
  pull_request:
    types: [labeled]

+concurrency:
+  group: build_doc-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  build-doc:
    if: github.event.label.name == 'doc' || github.event_name == 'push'
--- a/.github/workflows/run-aishell-2022-06-20.yml
+++ b/.github/workflows/run-aishell-2022-06-20.yml
@ -34,6 +34,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_aishell_2022_06_20-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_aishell_2022_06_20:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-gigaspeech-2022-05-13.yml
+++ b/.github/workflows/run-gigaspeech-2022-05-13.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_gigaspeech_2022_05_13-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_gigaspeech_2022_05_13:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-2022-03-12.yml
+++ b/.github/workflows/run-librispeech-2022-03-12.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_2022_03_12-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_2022_03_12:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-2022-04-29.yml
+++ b/.github/workflows/run-librispeech-2022-04-29.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_2022_04_29-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_2022_04_29:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-2022-05-13.yml
+++ b/.github/workflows/run-librispeech-2022-05-13.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_2022_05_13-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_2022_05_13:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-2022-11-11-stateless7.yml
+++ b/.github/workflows/run-librispeech-2022-11-11-stateless7.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_2022_11_11_zipformer-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_2022_11_11_zipformer:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-2022-11-14-stateless8.yml
+++ b/.github/workflows/run-librispeech-2022-11-14-stateless8.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_2022_11_14_zipformer_stateless8-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_2022_11_14_zipformer_stateless8:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml
+++ b/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_2022_11_28_conformer_ctc3-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_2022_11_28_conformer_ctc3:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
+++ b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
@ -16,6 +16,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_lstm_transducer_stateless2_2022_09_03:
    if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'ncnn' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml
+++ b/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_pruned_transducer_stateless3_2022_05_13-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_pruned_transducer_stateless3_2022_05_13:
    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml
+++ b/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_streaming_2022_06_26-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_streaming_2022_06_26:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
+++ b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_librispeech_2022_04_19-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_librispeech_2022_04_19:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-pretrained-conformer-ctc.yml
+++ b/.github/workflows/run-pretrained-conformer-ctc.yml
@ -23,6 +23,10 @@ on:
  pull_request:
    types: [labeled]

+concurrency:
+  group: run_pre_trained_conformer_ctc-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_pre_trained_conformer_ctc:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
--- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
@ -32,6 +32,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
@ -32,6 +32,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
@ -23,6 +23,10 @@ on:
  pull_request:
    types: [labeled]

+concurrency:
+  group: run_pre_trained_transducer_stateless_modified_2_aishell-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_pre_trained_transducer_stateless_modified_2_aishell:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
--- a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
@ -23,6 +23,10 @@ on:
  pull_request:
    types: [labeled]

+concurrency:
+  group: run_pre_trained_transducer_stateless_modified_aishell-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_pre_trained_transducer_stateless_modified_aishell:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
--- a/.github/workflows/run-pretrained-transducer-stateless.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless.yml
@ -32,6 +32,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_pre_trained_transducer_stateless-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_pre_trained_transducer_stateless:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-pretrained-transducer.yml
+++ b/.github/workflows/run-pretrained-transducer.yml
@ -23,6 +23,10 @@ on:
  pull_request:
    types: [labeled]

+concurrency:
+  group: run_pre_trained_transducer-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_pre_trained_transducer:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
--- a/.github/workflows/run-ptb-rnn-lm.yml
+++ b/.github/workflows/run-ptb-rnn-lm.yml
@ -16,6 +16,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: run_ptb_rnn_lm_training-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  run_ptb_rnn_lm_training:
    if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
--- a/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml
+++ b/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml
@ -23,8 +23,12 @@ on:
  pull_request:
    types: [labeled]

+concurrency:
+  group: run_wenetspeech_pruned_transducer_stateless2-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
-  run_librispeech_pruned_transducer_stateless3_2022_05_13:
+  run_wenetspeech_pruned_transducer_stateless2:
    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'wenetspeech'
    runs-on: ${{ matrix.os }}
    strategy:
--- a/.github/workflows/run-yesno-recipe.yml
+++ b/.github/workflows/run-yesno-recipe.yml
@ -21,11 +21,15 @@ on:
    branches:
      - master
  pull_request:
-    types: [labeled]
+    branches:
+      - master
+
+concurrency:
+  group: run-yesno-recipe-${{ github.ref }}
+  cancel-in-progress: true

 jobs:
  run-yesno-recipe:
-    if: github.event.label.name == 'ready' || github.event_name == 'push'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@ -61,7 +65,7 @@ jobs:

      - name: Install Python dependencies
        run: |
-          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
+          grep -v '^#' ./requirements-ci.txt  | grep -v kaldifst | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf

--- a/.github/workflows/style_check.yml
+++ b/.github/workflows/style_check.yml
@ -24,6 +24,10 @@ on:
    branches:
      - master

+concurrency:
+  group: style_check-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
  style_check:
    runs-on: ${{ matrix.os }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -21,26 +21,23 @@ on:
    branches:
      - master
  pull_request:
-    types: [labeled]
+    branches:
+      - master
+
+concurrency:
+  group: test-${{ github.ref }}
+  cancel-in-progress: true

 jobs:
  test:
-    if: github.event.label.name == 'ready' || github.event_name == 'push'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        # os: [ubuntu-18.04, macos-10.15]
-        # disable macOS test for now.
-        os: [ubuntu-18.04]
-        python-version: [3.7, 3.8]
-        torch: ["1.8.0", "1.11.0"]
-        torchaudio: ["0.8.0", "0.11.0"]
-        k2-version: ["1.15.1.dev20220427"]
-        exclude:
-          - torch: "1.8.0"
-            torchaudio: "0.11.0"
-          - torch: "1.11.0"
-            torchaudio: "0.8.0"
+        os: [ubuntu-latest]
+        python-version: ["3.8"]
+        torch: ["1.10.0"]
+        torchaudio: ["0.10.0"]
+        k2-version: ["1.23.2.dev20221201"]

      fail-fast: false

@ -67,11 +64,7 @@ jobs:
          # numpy 1.20.x does not support python 3.6
          pip install numpy==1.19
          pip install torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
-          if [[ ${{ matrix.torchaudio }} == "0.11.0" ]]; then
          pip install torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
-          else
-            pip install torchaudio==${{ matrix.torchaudio }}
-          fi

          pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/
          pip install git+https://github.com/lhotse-speech/lhotse
@ -81,7 +74,6 @@ jobs:

          pip install kaldifst
          pip install onnxruntime
-
          pip install -r requirements.txt

      - name: Install graphviz
@ -124,7 +116,6 @@ jobs:
          cd ../transducer_stateless
          pytest -v -s

-          if [[ ${{ matrix.torchaudio }} == "0.10.0" ]]; then
          cd ../transducer
          pytest -v -s

@ -133,7 +124,6 @@ jobs:

          cd ../transducer_lstm
          pytest -v -s
-          fi

      - name: Run tests
        if: startsWith(matrix.os, 'macos')
@ -164,7 +154,6 @@ jobs:
          cd ../transducer_stateless
          pytest -v -s

-          if [[ ${{ matrix.torchaudio }} == "0.10.0" ]]; then
          cd ../transducer
          pytest -v -s

@ -173,4 +162,3 @@ jobs:

          cd ../transducer_lstm
          pytest -v -s
-          fi
--- a/egs/librispeech/ASR/local/train_bpe_model.py
+++ b/egs/librispeech/ASR/local/train_bpe_model.py
@ -93,7 +93,6 @@ def main():
        print(f"{model_file} exists - skipping")
        return

-
    shutil.copyfile(model_file, f"{lang_dir}/bpe.model")


--- a/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless2/beam_search.py
@ -2230,9 +2230,7 @@ def modified_beam_search_rnnlm_LODR(
        log_probs_shape = k2.ragged.create_ragged_shape2(
            row_splits=row_splits, cached_tot_size=log_probs.numel()
        )
-        ragged_log_probs = k2.RaggedTensor(
-            shape=log_probs_shape, value=log_probs
-        )
+        ragged_log_probs = k2.RaggedTensor(shape=log_probs_shape, value=log_probs)
        """
        for all hyps with a non-blank new token, score this token.
        It is a little confusing here because this for-loop
@ -2267,10 +2265,7 @@ def modified_beam_search_rnnlm_LODR(
        # forward RNNLM to get new states and scores
        if len(token_list) != 0:
            tokens_to_score = (
-                torch.tensor(token_list)
-                .to(torch.int64)
-                .to(device)
-                .reshape(-1, 1)
+                torch.tensor(token_list).to(torch.int64).to(device).reshape(-1, 1)
            )

            hs = torch.cat(hs, dim=1).to(device)
@ -2304,9 +2299,7 @@ def modified_beam_search_rnnlm_LODR(
                    state_cost = hyp.state_cost.forward_one_step(new_token)

                    # calculate the score of the latest token
-                    current_ngram_score = (
-                        state_cost.lm_score - hyp.state_cost.lm_score
-                    )
+                    current_ngram_score = state_cost.lm_score - hyp.state_cost.lm_score

                    assert current_ngram_score <= 0.0, (
                        state_cost.lm_score,
--- a/egs/librispeech/ASR/pruned_transducer_stateless3/test_scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless3/test_scaling.py
@ -52,17 +52,9 @@ def test_scaled_conv2d():
        torch.jit.script(conv2d)


-def test_activation_balancer():
-    act = ActivationBalancer(
-        channel_dim=1, max_abs=10.0, min_positive=0.05, max_positive=1.0
-    )
-    torch.jit.script(act)
-
-
 def main():
    test_scaled_conv1d()
    test_scaled_conv2d()
-    test_activation_balancer()


 if __name__ == "__main__":
--- a/egs/yesno/ASR/tdnn/asr_datamodule.py
+++ b/egs/yesno/ASR/tdnn/asr_datamodule.py
@ -121,7 +121,7 @@ class YesNoAsrDataModule(DataModule):
        group.add_argument(
            "--shuffle",
            type=str2bool,
-            default=True,
+            default=False,
            help="When enabled (=default), the examples will be "
            "shuffled for each epoch.",
        )