mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
parent
f23c8ce9dd
commit
6c7863c2f8
8
.github/scripts/docker/Dockerfile
vendored
8
.github/scripts/docker/Dockerfile
vendored
@ -31,12 +31,15 @@ LABEL github_repo="https://github.com/k2-fsa/icefall"
|
|||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
RUN pip install --no-cache-dir \
|
RUN pip install --no-cache-dir \
|
||||||
torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \
|
torch==${TORCH_VERSION}+cpu -f https://download.pytorch.org/whl/torch \
|
||||||
|
torchaudio==${TORCHAUDIO_VERSION}+cpu -f https://download.pytorch.org/whl/torchaudio \
|
||||||
k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
|
k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
|
||||||
\
|
\
|
||||||
git+https://github.com/lhotse-speech/lhotse \
|
git+https://github.com/lhotse-speech/lhotse \
|
||||||
kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
|
kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
|
||||||
|
conformer==0.3.2 \
|
||||||
cython \
|
cython \
|
||||||
|
diffusers \
|
||||||
dill \
|
dill \
|
||||||
espnet_tts_frontend \
|
espnet_tts_frontend \
|
||||||
graphviz \
|
graphviz \
|
||||||
@ -45,10 +48,11 @@ RUN pip install --no-cache-dir \
|
|||||||
kaldialign \
|
kaldialign \
|
||||||
kaldifst \
|
kaldifst \
|
||||||
kaldilm \
|
kaldilm \
|
||||||
|
librosa \
|
||||||
matplotlib \
|
matplotlib \
|
||||||
multi_quantization \
|
multi_quantization \
|
||||||
numba \
|
numba \
|
||||||
numpy \
|
"numpy<2.0" \
|
||||||
onnxoptimizer \
|
onnxoptimizer \
|
||||||
onnxsim \
|
onnxsim \
|
||||||
onnx \
|
onnx \
|
||||||
|
32
.github/scripts/docker/generate_build_matrix.py
vendored
32
.github/scripts/docker/generate_build_matrix.py
vendored
@ -43,9 +43,11 @@ def get_torchaudio_version(torch_version):
|
|||||||
|
|
||||||
|
|
||||||
def get_matrix():
|
def get_matrix():
|
||||||
k2_version = "1.24.4.dev20240223"
|
k2_version = "1.24.4.dev20241029"
|
||||||
kaldifeat_version = "1.25.4.dev20240223"
|
kaldifeat_version = "1.25.5.dev20241029"
|
||||||
version = "20240905"
|
version = "20241029"
|
||||||
|
|
||||||
|
# torchaudio 2.5.0 does not support python 3.13
|
||||||
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||||
torch_version = []
|
torch_version = []
|
||||||
# torch_version += ["1.13.0", "1.13.1"]
|
# torch_version += ["1.13.0", "1.13.1"]
|
||||||
@ -56,6 +58,7 @@ def get_matrix():
|
|||||||
torch_version += ["2.3.0", "2.3.1"]
|
torch_version += ["2.3.0", "2.3.1"]
|
||||||
torch_version += ["2.4.0"]
|
torch_version += ["2.4.0"]
|
||||||
torch_version += ["2.4.1"]
|
torch_version += ["2.4.1"]
|
||||||
|
torch_version += ["2.5.0"]
|
||||||
|
|
||||||
matrix = []
|
matrix = []
|
||||||
for p in python_version:
|
for p in python_version:
|
||||||
@ -69,25 +72,16 @@ def get_matrix():
|
|||||||
if version_gt(p, "3.11") and not version_gt(t, "2.1"):
|
if version_gt(p, "3.11") and not version_gt(t, "2.1"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if version_gt(p, "3.12") and not version_gt(t, "2.4"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if version_gt(t, "2.4") and version_gt("3.10", p):
|
||||||
|
# torch>=2.5 requires python 3.10
|
||||||
|
continue
|
||||||
|
|
||||||
k2_version_2 = k2_version
|
k2_version_2 = k2_version
|
||||||
kaldifeat_version_2 = kaldifeat_version
|
kaldifeat_version_2 = kaldifeat_version
|
||||||
|
|
||||||
if t == "2.2.2":
|
|
||||||
k2_version_2 = "1.24.4.dev20240328"
|
|
||||||
kaldifeat_version_2 = "1.25.4.dev20240329"
|
|
||||||
elif t == "2.3.0":
|
|
||||||
k2_version_2 = "1.24.4.dev20240425"
|
|
||||||
kaldifeat_version_2 = "1.25.4.dev20240425"
|
|
||||||
elif t == "2.3.1":
|
|
||||||
k2_version_2 = "1.24.4.dev20240606"
|
|
||||||
kaldifeat_version_2 = "1.25.4.dev20240606"
|
|
||||||
elif t == "2.4.0":
|
|
||||||
k2_version_2 = "1.24.4.dev20240725"
|
|
||||||
kaldifeat_version_2 = "1.25.4.dev20240725"
|
|
||||||
elif t == "2.4.1":
|
|
||||||
k2_version_2 = "1.24.4.dev20240905"
|
|
||||||
kaldifeat_version_2 = "1.25.4.dev20240905"
|
|
||||||
|
|
||||||
matrix.append(
|
matrix.append(
|
||||||
{
|
{
|
||||||
"k2-version": k2_version_2,
|
"k2-version": k2_version_2,
|
||||||
|
@ -129,20 +129,34 @@ done
|
|||||||
|
|
||||||
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
mkdir -p zipformer/exp
|
mkdir -p zipformer/exp
|
||||||
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
|
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
|
||||||
|
mkdir -p data
|
||||||
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
ls -lh data
|
ls -lh data
|
||||||
ls -lh zipformer/exp
|
ls -lh zipformer/exp
|
||||||
|
|
||||||
|
mkdir -p data/fbank
|
||||||
|
pushd data/fbank
|
||||||
|
|
||||||
|
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
|
||||||
|
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
|
||||||
|
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
|
||||||
|
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
|
||||||
|
|
||||||
|
ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
|
||||||
|
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
|
||||||
|
|
||||||
|
popd
|
||||||
|
|
||||||
log "Decoding test-clean and test-other"
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
# use a small value for decoding with CPU
|
# use a small value for decoding with CPU
|
||||||
max_duration=100
|
max_duration=100
|
||||||
|
|
||||||
for method in greedy_search fast_beam_search modified_beam_search; do
|
for method in greedy_search; do
|
||||||
log "Decoding with $method"
|
log "Decoding with $method"
|
||||||
|
|
||||||
./zipformer/decode.py \
|
./zipformer/decode.py \
|
||||||
|
@ -90,10 +90,6 @@ jobs:
|
|||||||
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
mkdir -p egs/gigaspeech/ASR/data
|
|
||||||
ln -sfv ~/tmp/fbank-libri egs/gigaspeech/ASR/data/fbank
|
|
||||||
ls -lh egs/gigaspeech/ASR/data/*
|
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
@ -112,7 +108,7 @@ jobs:
|
|||||||
tag: asr-models
|
tag: asr-models
|
||||||
|
|
||||||
- name: Display decoding results for gigaspeech zipformer
|
- name: Display decoding results for gigaspeech zipformer
|
||||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
cd egs/gigaspeech/ASR/
|
cd egs/gigaspeech/ASR/
|
||||||
@ -124,17 +120,17 @@ jobs:
|
|||||||
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
echo "===fast_beam_search==="
|
# echo "===fast_beam_search==="
|
||||||
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
# find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
# find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
#
|
||||||
echo "===modified beam search==="
|
# echo "===modified beam search==="
|
||||||
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
# find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
# find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
- name: Upload decoding results for gigaspeech zipformer
|
- name: Upload decoding results for gigaspeech zipformer
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
|
||||||
with:
|
with:
|
||||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11
|
||||||
path: egs/gigaspeech/ASR/zipformer/exp/
|
path: egs/gigaspeech/ASR/zipformer/exp/
|
||||||
|
1
.github/workflows/yesno.yml
vendored
1
.github/workflows/yesno.yml
vendored
@ -61,5 +61,6 @@ jobs:
|
|||||||
|
|
||||||
python3 -m torch.utils.collect_env
|
python3 -m torch.utils.collect_env
|
||||||
python3 -m k2.version
|
python3 -m k2.version
|
||||||
|
pip list
|
||||||
|
|
||||||
.github/scripts/yesno/ASR/run.sh
|
.github/scripts/yesno/ASR/run.sh
|
||||||
|
Loading…
x
Reference in New Issue
Block a user