Compare commits
124 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
9165e73a3b | ||
|
8cda08e0c5 | ||
|
6edd6a8370 | ||
|
0ecdee6e88 | ||
|
3f79fbbd6d | ||
|
c1aefb643e | ||
|
e485c5749b | ||
|
e05540cfad | ||
|
5f9602afab | ||
|
a39a011de9 | ||
|
eeefc87660 | ||
|
0355ed8ced | ||
|
f36767ed1d | ||
|
40cc0a4a2c | ||
|
843763fd05 | ||
|
5dee672556 | ||
|
2a8a993f15 | ||
|
2e042b356e | ||
|
b75abef4c0 | ||
|
4fd12b48b8 | ||
|
f834ad861d | ||
|
352aa43ce3 | ||
|
c1adbda9f0 | ||
|
2624da8275 | ||
|
20379449fc | ||
|
01aed93b1b | ||
|
7912c2f442 | ||
|
46ab855aa5 | ||
|
b30f385898 | ||
|
9fae1afa65 | ||
|
5a5895d20b | ||
|
6f5ab0ee14 | ||
|
dde905597c | ||
|
3c78253a56 | ||
|
64b6e7fcc5 | ||
|
114ea348a7 | ||
|
af6e61a5a3 | ||
|
b65c799b4e | ||
|
6c60c618e6 | ||
|
1868136b40 | ||
|
6be131252a | ||
|
17861dce6b | ||
|
9b79da9128 | ||
|
bf2047023f | ||
|
9fdc43df23 | ||
|
749eff994f | ||
|
94a567c638 | ||
|
8ee0c34d3a | ||
|
5c7566f2e2 | ||
|
a1f252c981 | ||
|
c38f0252d5 | ||
|
4b1411b04f | ||
|
428571a274 | ||
|
3692f9a54c | ||
|
06b5574bcb | ||
|
2bd09f4b18 | ||
|
01f30d2e34 | ||
|
e78f081327 | ||
|
e15b2ae6f5 | ||
|
cc1cd67c3e | ||
|
c6e643951f | ||
|
ae17334ce2 | ||
|
48d96b9250 | ||
|
1a43b6117d | ||
|
0c0e24298c | ||
|
46535d5e6d | ||
|
88627631b6 | ||
|
febd091b24 | ||
|
a2d21601bc | ||
|
800a6ff1d9 | ||
|
32f4ba6c8e | ||
|
25b70c9cd5 | ||
|
fa6404ad19 | ||
|
0348fda69f | ||
|
9f8eaa6cd4 | ||
|
d1a473a069 | ||
|
6e41bb2039 | ||
|
ca273828f1 | ||
|
3e2979ec45 | ||
|
41bd60a538 | ||
|
da74f96e4d | ||
|
fc78bc2be7 | ||
|
2f65906554 | ||
|
78454c23f3 | ||
|
13f2917e69 | ||
|
1efbb6dbf3 | ||
|
1313e9dc61 | ||
|
2ea9ff75b4 | ||
|
d4f2a89b50 | ||
|
49039c074a | ||
|
08dd6c52de | ||
|
2f9fc99e6b | ||
|
07d11476c1 | ||
|
00e550c11b | ||
|
959da88b6e | ||
|
47ff9a2c35 | ||
|
28a4ff680c | ||
|
9d4872aa09 | ||
|
b72fc599fd | ||
|
e452fd4079 | ||
|
ea61de7eb4 | ||
|
e793159cc7 | ||
|
8f03b654fc | ||
|
e59d05a45a | ||
|
039e27dd32 | ||
|
34ba30272d | ||
|
3196cff441 | ||
|
2399cc8993 | ||
|
4aab351344 | ||
|
d6274e7d41 | ||
|
d2652a2c49 | ||
|
f0d69c3f9e | ||
|
632098e0c1 | ||
|
b3a90b1dcf | ||
|
72aa5eab2b | ||
|
ec66d87fae | ||
|
90f82455f3 | ||
|
78e79b7b35 | ||
|
3de5b99c30 | ||
|
845baf7e45 | ||
|
3ed1686424 | ||
|
dccc927428 | ||
|
22f2fb7371 | ||
|
b5c19185ea |
2
.flake8
@ -3,8 +3,10 @@ max-line-length = 80
|
||||
|
||||
exclude =
|
||||
.git,
|
||||
doc,
|
||||
build,
|
||||
build_release,
|
||||
cmake/cmake_extension.py,
|
||||
kaldifeat/python/kaldifeat/__init__.py
|
||||
|
||||
ignore =
|
||||
|
81
.github/workflows/build-doc.yml
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
# Copyright 2022 Xiaomi Corp. (author: Fangjun Kuang)
|
||||
|
||||
# See ../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# refer to https://github.com/actions/starter-workflows/pull/47/files
|
||||
|
||||
# You can access it at https://csukuangfj.github.io/kaldifeat
|
||||
name: Generate doc
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- doc
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-doc:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: [3.8]
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Display Python version
|
||||
run: python -c "import sys; print(sys.version)"
|
||||
|
||||
- name: Update wheels
|
||||
shell: bash
|
||||
run: |
|
||||
export KALDIFEAT_DIR=$PWD
|
||||
ls -lh $KALDIFEAT_DIR
|
||||
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
|
||||
cd huggingface
|
||||
|
||||
./run.sh
|
||||
|
||||
- name: Build doc
|
||||
shell: bash
|
||||
run: |
|
||||
cd doc
|
||||
git status
|
||||
python3 -m pip install -r ./requirements.txt
|
||||
make html
|
||||
cp source/cpu.html build/html/
|
||||
cp source/cuda.html build/html/
|
||||
cp source/cpu-cn.html build/html/
|
||||
cp source/cuda-cn.html build/html/
|
||||
touch build/html/.nojekyll
|
||||
|
||||
- name: Deploy
|
||||
uses: peaceiris/actions-gh-pages@v3
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./doc/build/html
|
||||
publish_branch: gh-pages
|
163
.github/workflows/build_conda.yml
vendored
@ -1,163 +0,0 @@
|
||||
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
|
||||
|
||||
# See ../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# refer to https://github.com/actions/starter-workflows/pull/47/files
|
||||
|
||||
name: build_conda_cuda
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- conda-cuda
|
||||
|
||||
env:
|
||||
KALDIFEAT_BUILD_TYPE: Release
|
||||
|
||||
jobs:
|
||||
build_conda_cuda:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-16.04]
|
||||
# anaconda does not support 3.9 as of 2021.05.08
|
||||
python-version: [3.6, 3.7, 3.8, 3.9]
|
||||
# python-version: [3.6, 3.7, 3.8]
|
||||
cuda: ["10.1", "10.2", "11.0", "11.1"]
|
||||
# from https://download.pytorch.org/whl/torch_stable.html
|
||||
#
|
||||
# PyTorch 1.9.0 supports: 10.2 (default), 11.1
|
||||
# PyTorch 1.8.1 supports: cuda 10.1, 10.2 (default), 11.1
|
||||
# PyTorch 1.8.0 supports: cuda 10.1, 10.2 (default), 11.1
|
||||
# PyTorch 1.7.x supports: cuda 10.1, 10.2 (default), 11.0, 9.2 (not included in this setup)
|
||||
# PyTorch 1.6.0 supports: cuda 10.1, 10.2 (default), 9.2 (not included in this setup)
|
||||
# PyTorch 1.5.x supports: cuda 10.1, 10.2 (default), 9.2 (not included in this setup)
|
||||
#
|
||||
# PyTorch 1.8.x and 1.7.1 support 3.6, 3.7, 3.8, 3.9
|
||||
# PyTorch 1.7.0, 1.6.0, and 1.5.x support 3.6, 3.7, 3.8
|
||||
#
|
||||
# Other PyTorch versions are not tested
|
||||
#
|
||||
# torch: ["1.5.0", "1.5.1", "1.6.0", "1.7.0", "1.7.1", "1.8.0", "1.8.1"]
|
||||
# 1.5.x is removed because there are compilation errors.
|
||||
# See
|
||||
# https://github.com/csukuangfj/k2/runs/2533830771?check_suite_focus=true
|
||||
# and
|
||||
# https://github.com/NVIDIA/apex/issues/805
|
||||
torch: ["1.6.0", "1.7.0", "1.7.1", "1.8.0", "1.8.1", "1.9.0"]
|
||||
exclude:
|
||||
# - cuda: "11.0" # exclude 11.0 for [1.5.0, 1.5.1, 1.6.0, 1.8.0, 1.8.1, 1.9.0]
|
||||
# torch: "1.5.0"
|
||||
# - cuda: "11.0"
|
||||
# torch: "1.5.1"
|
||||
- cuda: "11.0"
|
||||
torch: "1.6.0"
|
||||
- cuda: "11.0"
|
||||
torch: "1.8.0"
|
||||
- cuda: "11.0"
|
||||
torch: "1.8.1"
|
||||
- cuda: "11.0"
|
||||
torch: "1.9.0"
|
||||
# - cuda: "11.1" # exclude 11.1 for [1.5.0, 1.5.1, 1.6.0, 1.7.0, 1.7.1]
|
||||
# torch: "1.5.0"
|
||||
# - cuda: "11.1"
|
||||
# torch: "1.5.1"
|
||||
- cuda: "11.1"
|
||||
torch: "1.6.0"
|
||||
- cuda: "11.1"
|
||||
torch: "1.7.0"
|
||||
- cuda: "11.1"
|
||||
torch: "1.7.1"
|
||||
- cuda: "10.1" # exclude 10.1 for [1.9.0]
|
||||
torch: "1.9.0"
|
||||
- python-version: 3.9 # exclude Python 3.9 for [1.5.0, 1.5.1, 1.6.0, 1.7.0]
|
||||
torch: "1.5.0"
|
||||
- python-version: 3.9
|
||||
torch: "1.5.1"
|
||||
- python-version: 3.9
|
||||
torch: "1.6.0"
|
||||
- python-version: 3.9
|
||||
torch: "1.7.0"
|
||||
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install CUDA Toolkit ${{ matrix.cuda }}
|
||||
shell: bash -l {0}
|
||||
env:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
run: |
|
||||
source ./scripts/github_actions/install_cuda.sh
|
||||
echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV
|
||||
echo "${CUDA_HOME}/bin" >> $GITHUB_PATH
|
||||
echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
|
||||
- name: Display NVCC version
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
which nvcc
|
||||
nvcc --version
|
||||
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
with:
|
||||
auto-update-conda: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: kaldifeat
|
||||
|
||||
- name: Display Python version
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
python3 -c "import sys; print(sys.version)"
|
||||
which python3
|
||||
|
||||
- name: Install conda dependencies
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
conda install -y -q anaconda-client
|
||||
conda install -y -q conda-build
|
||||
conda install -y -q bs4 requests tqdm
|
||||
conda install -y -q -c pytorch -c conda-forge pytorch=${{ matrix.torch }} cudatoolkit=${{ matrix.cuda }}
|
||||
|
||||
- name: Display conda info
|
||||
shell: bash -l {0}
|
||||
run: |
|
||||
which conda
|
||||
conda env list
|
||||
conda info
|
||||
nproc
|
||||
|
||||
- name: Download cudnn 8.0
|
||||
shell: bash -l {0}
|
||||
env:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
run: |
|
||||
./scripts/github_actions/install_cudnn.sh
|
||||
|
||||
- name: Build kaldifeat
|
||||
shell: bash -l {0}
|
||||
env:
|
||||
KALDIFEAT_CUDA_VERSION: ${{ matrix.cuda }}
|
||||
KALDIFEAT_PYTHON_VERSION: ${{ matrix.python-version}}
|
||||
KALDIFEAT_TORCH_VERSION: ${{ matrix.torch }}
|
||||
KALDIFEAT_CONDA_TOKEN: ${{ secrets.KALDIFEAT_CONDA_TOKEN}}
|
||||
KALDIFEAT_IS_GITHUB_ACTIONS: 1
|
||||
KALDIFEAT_IS_FOR_CONDA: 1
|
||||
run: |
|
||||
export KALDIFEAT_BUILD_TYPE=$KALDIFEAT_BUILD_TYPE
|
||||
./scripts/build_conda.sh
|
121
.github/workflows/macos-cpu-wheels.yml
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
name: build-wheels-cpu-macos
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
# - wheel
|
||||
- torch-2.8.0
|
||||
tags:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: build-wheels-cpu-macos-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
# python ./scripts/github_actions/generate_build_matrix.py --for-macos
|
||||
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-macos)
|
||||
|
||||
python ./scripts/github_actions/generate_build_matrix.py --for-macos --test-only-latest-torch
|
||||
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-macos --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
build_wheels_macos_cpu:
|
||||
needs: generate_build_matrix
|
||||
name: ${{ matrix.torch }} ${{ matrix.python-version }}
|
||||
runs-on: macos-14
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
pip install -q torch==${{ matrix.torch}} cmake numpy wheel>=0.40.0 twine setuptools
|
||||
|
||||
- name: Build wheel
|
||||
shell: bash
|
||||
run: |
|
||||
python3 setup.py bdist_wheel
|
||||
mkdir wheelhouse
|
||||
cp -v dist/* wheelhouse
|
||||
|
||||
- name: Display wheels (before fix)
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh ./wheelhouse/
|
||||
|
||||
- name: Fix wheel platform tag
|
||||
run: |
|
||||
# See https://github.com/glencoesoftware/zeroc-ice-py-macos-x86_64/pull/3/files
|
||||
# See:
|
||||
# * https://github.com/pypa/wheel/issues/406
|
||||
python -m wheel tags \
|
||||
--platform-tag=macosx_11_0_arm64 \
|
||||
--remove wheelhouse/*.whl
|
||||
|
||||
- name: Display wheels (after fix)
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh ./wheelhouse/
|
||||
|
||||
- name: Upload Wheel
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-macos-latest-cpu
|
||||
path: wheelhouse/*.whl
|
||||
|
||||
# https://huggingface.co/docs/hub/spaces-github-actions
|
||||
- name: Publish to huggingface
|
||||
if: github.repository_owner == 'csukuangfj'
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v2
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
rm -rf huggingface
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
|
||||
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
|
||||
cd huggingface
|
||||
git pull
|
||||
|
||||
d=cpu/1.25.5.dev20241029/macos
|
||||
mkdir -p $d
|
||||
cp -v ../wheelhouse/*.whl ./$d
|
||||
git status
|
||||
git lfs track "*.whl"
|
||||
git add .
|
||||
git commit -m "upload macos wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
|
6
.github/workflows/publish_to_pypi.yml
vendored
@ -20,25 +20,27 @@ on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
pypi:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.6
|
||||
python-version: 3.8
|
||||
|
||||
- name: Install Python dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install wheel twine setuptools
|
||||
python3 -m pip install torch==1.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
|
||||
- name: Build
|
||||
shell: bash
|
||||
|
121
.github/workflows/publish_to_pypi.yml-bak
vendored
@ -1,121 +0,0 @@
|
||||
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
|
||||
|
||||
# See ../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
name: Publish to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
|
||||
jobs:
|
||||
pypi:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-18.04, macos-10.15]
|
||||
cuda: ["10.1"]
|
||||
torch: ["1.7.1"]
|
||||
python-version: [3.6, 3.7, 3.8]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install GCC 7
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
run: |
|
||||
sudo apt-get install -y gcc-7 g++-7
|
||||
echo "CC=/usr/bin/gcc-7" >> $GITHUB_ENV
|
||||
echo "CXX=/usr/bin/g++-7" >> $GITHUB_ENV
|
||||
|
||||
|
||||
- name: Install CUDA Toolkit ${{ matrix.cuda }}
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
shell: bash
|
||||
env:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
run: |
|
||||
source ./scripts/github_actions/install_cuda.sh
|
||||
echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV
|
||||
echo "${CUDA_HOME}/bin" >> $GITHUB_PATH
|
||||
echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
|
||||
- name: Display NVCC version
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
run: |
|
||||
which nvcc
|
||||
nvcc --version
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch }}
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
env:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
torch: ${{ matrix.torch }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install wheel twine typing_extensions
|
||||
python3 -m pip install bs4 requests tqdm
|
||||
|
||||
./scripts/github_actions/install_torch.sh
|
||||
python3 -c "import torch; print('torch version:', torch.__version__)"
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch }}
|
||||
if: startsWith(matrix.os, 'macos')
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -qq --upgrade pip
|
||||
python3 -m pip install -qq wheel twine typing_extensions
|
||||
python3 -m pip install -qq torch==${{ matrix.torch }}
|
||||
|
||||
python3 -c "import torch; print('torch version:', torch.__version__)"
|
||||
|
||||
- name: Download cudnn 8.0
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
env:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
run: |
|
||||
./scripts/github_actions/install_cudnn.sh
|
||||
|
||||
- name: Build pip packages
|
||||
shell: bash
|
||||
env:
|
||||
KALDIFEAT_IS_FOR_PYPI: 1
|
||||
run: |
|
||||
tag=$(python3 -c "import sys; print(''.join(sys.version[:3].split('.')))")
|
||||
export KALDIFEAT_MAKE_ARGS="-j2"
|
||||
python3 setup.py bdist_wheel --python-tag=py${tag}
|
||||
ls -lh dist/
|
||||
|
||||
- name: Publish wheels to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||
run: |
|
||||
twine upload dist/kaldifeat-*.whl
|
||||
|
||||
- name: Upload Wheel
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-${{ matrix.os }}
|
||||
path: dist/*.whl
|
85
.github/workflows/run-tests-macos-cpu.yml
vendored
Normal file
@ -0,0 +1,85 @@
|
||||
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
|
||||
|
||||
# See ../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
name: Run tests macos cpu
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
|
||||
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
run_tests_macos_cpu:
|
||||
needs: generate_build_matrix
|
||||
runs-on: macos-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -qq --upgrade pip
|
||||
python3 -m pip install -qq wheel twine typing_extensions soundfile numpy
|
||||
python3 -m pip install -qq torch==${{ matrix.torch }} -f https://download.pytorch.org/whl/torch_stable.html || python3 -m pip install -qq torch==${{ matrix.torch }} -f https://download.pytorch.org/whl/torch/
|
||||
|
||||
python3 -c "import torch; print('torch version:', torch.__version__)"
|
||||
|
||||
- name: Build
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir build_release
|
||||
cd build_release
|
||||
cmake -DCMAKE_CXX_STANDARD=17 ..
|
||||
make VERBOSE=1 -j3
|
||||
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd build_release
|
||||
ctest --output-on-failure
|
88
.github/workflows/run-tests-ubuntu-cpu.yml
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
|
||||
|
||||
# See ../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
name: Run tests ubuntu cpu
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
|
||||
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
run_tests_ubuntu_cpu:
|
||||
needs: generate_build_matrix
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch }}
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libsndfile1-dev libsndfile1 ffmpeg
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install wheel twine typing_extensions soundfile
|
||||
python3 -m pip install bs4 requests tqdm numpy
|
||||
python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch/
|
||||
|
||||
python3 -c "import torch; print('torch version:', torch.__version__)"
|
||||
|
||||
- name: Build
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir build_release
|
||||
cd build_release
|
||||
cmake -DCMAKE_CXX_STANDARD=17 ..
|
||||
make VERBOSE=1 -j3
|
||||
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd build_release
|
||||
ctest --output-on-failure
|
@ -14,28 +14,45 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
name: Run tests
|
||||
name: Run tests ubuntu cuda
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
run_tests:
|
||||
runs-on: ${{ matrix.os }}
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch
|
||||
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
run_tests_ubuntu_cuda:
|
||||
needs: generate_build_matrix
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-18.04, macos-10.15]
|
||||
cuda: ["10.1"]
|
||||
torch: ["1.7.1"]
|
||||
python-version: [3.6, 3.7, 3.8]
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@ -44,16 +61,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install GCC 7
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
run: |
|
||||
sudo apt-get install -y gcc-7 g++-7
|
||||
echo "CC=/usr/bin/gcc-7" >> $GITHUB_ENV
|
||||
echo "CXX=/usr/bin/g++-7" >> $GITHUB_ENV
|
||||
|
||||
|
||||
- name: Install CUDA Toolkit ${{ matrix.cuda }}
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
shell: bash
|
||||
env:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
@ -64,39 +72,26 @@ jobs:
|
||||
echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
|
||||
- name: Display NVCC version
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
run: |
|
||||
which nvcc
|
||||
nvcc --version
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch }}
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
env:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
torch: ${{ matrix.torch }}
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install libsndfile1-dev libsndfile1 ffmpeg
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libsndfile1-dev libsndfile1 ffmpeg
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install wheel twine typing_extensions soundfile
|
||||
python3 -m pip install bs4 requests tqdm
|
||||
python3 -m pip install bs4 requests tqdm numpy
|
||||
|
||||
./scripts/github_actions/install_torch.sh
|
||||
python3 -c "import torch; print('torch version:', torch.__version__)"
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch }}
|
||||
if: startsWith(matrix.os, 'macos')
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m pip install -qq --upgrade pip
|
||||
python3 -m pip install -qq wheel twine typing_extensions soundfile
|
||||
python3 -m pip install -qq torch==${{ matrix.torch }}
|
||||
|
||||
python3 -c "import torch; print('torch version:', torch.__version__)"
|
||||
|
||||
- name: Download cudnn 8.0
|
||||
if: startsWith(matrix.os, 'ubuntu')
|
||||
env:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
run: |
|
||||
@ -107,11 +102,11 @@ jobs:
|
||||
run: |
|
||||
mkdir build_release
|
||||
cd build_release
|
||||
cmake ..
|
||||
make VERBOSE=1
|
||||
cmake -DCMAKE_CXX_STANDARD=17 ..
|
||||
make VERBOSE=1 -j3
|
||||
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd build_release
|
||||
ctest -R py --output-on-failure
|
||||
ctest --output-on-failure
|
121
.github/workflows/run-tests-windows-cpu.yml
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
|
||||
|
||||
# See ../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
name: Run tests windows cpu
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
|
||||
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
run_tests_windows_cpu:
|
||||
# see https://github.com/actions/virtual-environments/blob/win19/20210525.0/images/win/Windows2019-Readme.md
|
||||
needs: generate_build_matrix
|
||||
runs-on: windows-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# see https://github.com/microsoft/setup-msbuild
|
||||
- name: Add msbuild to PATH
|
||||
uses: microsoft/setup-msbuild@v1.0.2
|
||||
|
||||
- name: Setup Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Display Python version
|
||||
run: python -c "import sys; print(sys.version)"
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch }}
|
||||
run: |
|
||||
pip3 install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || pip3 install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch/
|
||||
pip3 install -qq wheel twine dataclasses numpy typing_extensions soundfile
|
||||
|
||||
- name: Display CMake version
|
||||
run: |
|
||||
cmake --version
|
||||
cmake --help
|
||||
|
||||
- name: Configure CMake
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir build_release
|
||||
cd build_release
|
||||
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
|
||||
ls -lh
|
||||
|
||||
- name: Build kaldifeat
|
||||
run: |
|
||||
cd build_release
|
||||
cmake --build -DCMAKE_CXX_STANDARD=17 . --target _kaldifeat --config Release
|
||||
|
||||
- name: Display generated files
|
||||
shell: bash
|
||||
run: |
|
||||
cd build_release
|
||||
ls -lh lib/*/*
|
||||
|
||||
- name: Build wheel
|
||||
shell: bash
|
||||
run: |
|
||||
python3 setup.py bdist_wheel
|
||||
ls -lh dist/
|
||||
pip install ./dist/*.whl
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
|
||||
- name: Upload Wheel
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: python-${{ matrix.python-version }}-${{ matrix.os }}-cpu
|
||||
path: dist/*.whl
|
||||
|
||||
- name: Build tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd build_release
|
||||
cmake -DCMAKE_CXX_STANDARD=17 --build . --target ALL_BUILD --config Release
|
||||
ls -lh bin/*/*
|
||||
ctest -C Release --verbose --output-on-failure
|
173
.github/workflows/run-tests-windows-cuda.yml
vendored
Normal file
@ -0,0 +1,173 @@
|
||||
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
|
||||
|
||||
# See ../../LICENSE for clarification regarding multiple authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
name: Run tests windows cuda
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python scripts/github_actions/generate_build_matrix.py --enable-cuda --for-windows --test-only-latest-torch
|
||||
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --enable-cuda --for-windows --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
run_tests_windows_cuda:
|
||||
needs: generate_build_matrix
|
||||
runs-on: windows-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# see https://github.com/microsoft/setup-msbuild
|
||||
- name: Add msbuild to PATH
|
||||
uses: microsoft/setup-msbuild@v1.0.2
|
||||
|
||||
- name: Setup Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Display Python version
|
||||
run: python -c "import sys; print(sys.version)"
|
||||
|
||||
# See https://github.com/Jimver/cuda-toolkit/blob/master/src/links/windows-links.ts
|
||||
# for available CUDA versions
|
||||
- uses: Jimver/cuda-toolkit@v0.2.7
|
||||
id: cuda-toolkit
|
||||
with:
|
||||
cuda: ${{ matrix.cuda }}
|
||||
|
||||
- name: Display CUDA version
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Installed cuda version is: ${{ steps.cuda-toolkit.outputs.cuda }}"
|
||||
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
|
||||
nvcc --version
|
||||
|
||||
- name: Remove CUDA installation package
|
||||
shell: bash
|
||||
run: |
|
||||
rm "C:/hostedtoolcache/windows/cuda_installer-windows/${{ matrix.cuda }}/x64/cuda_installer_${{ matrix.cuda }}.exe"
|
||||
|
||||
- name: Download cuDNN
|
||||
shell: bash
|
||||
run: |
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/cudnn-for-windows
|
||||
cd cudnn-for-windows
|
||||
git lfs pull --include="cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip"
|
||||
unzip cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip
|
||||
rm cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip
|
||||
ls -lh *
|
||||
ls -lh */*
|
||||
|
||||
echo "PWD: $PWD"
|
||||
|
||||
- name: Install PyTorch ${{ matrix.torch }}
|
||||
shell: bash
|
||||
run: |
|
||||
version=${{ matrix.cuda }}
|
||||
major=${version:0:2}
|
||||
minor=${version:3:1}
|
||||
v=${major}${minor}
|
||||
if [ ${v} -eq 102 ]; then v=""; else v="+cu${v}"; fi
|
||||
|
||||
python3 -m pip install -qq --upgrade pip
|
||||
python3 -m pip install -qq wheel twine numpy typing_extensions
|
||||
python3 -m pip install -qq dataclasses soundfile numpy
|
||||
python3 -m pip install -qq torch==${{ matrix.torch }}${v} -f https://download.pytorch.org/whl/torch_stable.html numpy || python3 -m pip install -qq torch==${{ matrix.torch }}${v} -f https://download.pytorch.org/whl/torch/ numpy
|
||||
|
||||
python3 -c "import torch; print('torch version:', torch.__version__)"
|
||||
|
||||
python3 -m torch.utils.collect_env
|
||||
|
||||
- name: Display CMake version
|
||||
run: |
|
||||
cmake --version
|
||||
cmake --help
|
||||
|
||||
- name: Configure CMake
|
||||
shell: bash
|
||||
run: |
|
||||
echo "PWD: $PWD"
|
||||
ls -lh
|
||||
|
||||
mkdir build_release
|
||||
cd build_release
|
||||
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCUDNN_INCLUDE_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/include -DCUDNN_LIBRARY_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/lib/cudnn.lib ..
|
||||
ls -lh
|
||||
|
||||
- name: Build kaldifeat
|
||||
shell: bash
|
||||
run: |
|
||||
cd build_release
|
||||
cmake --build . --target _kaldifeat --config Release
|
||||
|
||||
- name: Display generated files
|
||||
shell: bash
|
||||
run: |
|
||||
cd build_release
|
||||
ls -lh lib/*/*
|
||||
|
||||
- name: Build wheel
|
||||
shell: bash
|
||||
run: |
|
||||
echo $PWD
|
||||
ls -lh ./*
|
||||
export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCUDNN_INCLUDE_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/include -DCUDNN_LIBRARY_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/lib/cudnn.lib"
|
||||
python3 setup.py bdist_wheel
|
||||
ls -lh dist/
|
||||
pip install ./dist/*.whl
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
|
||||
- name: Upload Wheel
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: python-${{ matrix.python-version }}-${{ matrix.os }}-cuda-${{ matrix.cuda }}
|
||||
path: dist/*.whl
|
||||
|
||||
- name: Build tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd build_release
|
||||
cmake -DCMAKE_CXX_STANDARD=17 --build . --target ALL_BUILD --config Release
|
||||
ls -lh bin/*/*
|
||||
ctest -C Release --verbose --output-on-failure
|
10
.github/workflows/style_check.yml
vendored
@ -29,12 +29,12 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-18.04, macos-10.15]
|
||||
python-version: [3.7, 3.8, 3.9]
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
python-version: ["3.8"]
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@ -45,7 +45,9 @@ jobs:
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python3 -m pip install --upgrade pip black flake8
|
||||
python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2 click==8.0.4
|
||||
# See https://github.com/psf/black/issues/2964
|
||||
# The version of click should be selected from 8.0.0, 8.0.1, 8.0.2, 8.0.3, and 8.0.4
|
||||
|
||||
- name: Run flake8
|
||||
shell: bash
|
||||
|
67
.github/workflows/test-wheels.yml
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
name: Test pre-compiled wheels
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
torch_version:
|
||||
description: "torch version, e.g., 2.0.1"
|
||||
required: true
|
||||
kaldifeat_version:
|
||||
description: "kaldifeat version, e.g., 1.25.0.dev20230726"
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
Test_pre_compiled_wheels:
|
||||
name: ${{ matrix.os }} ${{ github.event.inputs.torch_version }} ${{ github.event.inputs.kaldifeat_version }} ${{ matrix.python-version }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Display Python version
|
||||
run: python -c "import sys; print(sys.version)"
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
pip install numpy
|
||||
|
||||
- name: Install torch
|
||||
if: startsWith(matrix.os, 'macos')
|
||||
shell: bash
|
||||
run: |
|
||||
pip install torch==${{ github.event.inputs.torch_version }}
|
||||
|
||||
- name: Install torch
|
||||
if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'windows')
|
||||
shell: bash
|
||||
run: |
|
||||
pip install torch==${{ github.event.inputs.torch_version }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || pip install torch==${{ github.event.inputs.torch_version }}+cpu -f https://download.pytorch.org/whl/torch/
|
||||
|
||||
- name: Install kaldifeat
|
||||
shell: bash
|
||||
run: |
|
||||
pip install kaldifeat==${{ github.event.inputs.kaldifeat_version }}+cpu.torch${{ github.event.inputs.torch_version }} -f https://csukuangfj.github.io/kaldifeat/cpu.html
|
||||
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
run: |
|
||||
cd kaldifeat/python/tests
|
||||
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__file__)"
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
|
||||
python3 ./test_fbank_options.py
|
||||
|
||||
python3 ./test_mfcc_options.py
|
168
.github/workflows/ubuntu-arm64-cpu-wheels.yml
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
name: build-wheels-cpu-arm64-ubuntu
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
# - wheel
|
||||
- torch-2.8.0
|
||||
tags:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: build-wheels-cpu-arm64-ubuntu-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
# python ./scripts/github_actions/generate_build_matrix.py --for-arm64
|
||||
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-arm64)
|
||||
|
||||
python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch --for-arm64
|
||||
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch --for-arm64)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
build-manylinux-wheels:
|
||||
needs: generate_build_matrix
|
||||
name: ${{ matrix.torch }} ${{ matrix.python-version }}
|
||||
runs-on: ubuntu-22.04-arm
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
|
||||
# https://github.com/pytorch/builder/tree/main/manywheel
|
||||
# https://github.com/pytorch/builder/pull/476
|
||||
# https://github.com/k2-fsa/k2/issues/733
|
||||
# https://github.com/pytorch/pytorch/pull/50633 (generate build matrix)
|
||||
- name: Run the build process with Docker
|
||||
uses: addnab/docker-run-action@v3
|
||||
with:
|
||||
image: ${{ matrix.image }}
|
||||
options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }}
|
||||
run: |
|
||||
echo "pwd: $PWD"
|
||||
uname -a
|
||||
id
|
||||
cat /etc/*release
|
||||
gcc --version
|
||||
python3 --version
|
||||
which python3
|
||||
|
||||
ls -lh /opt/python/
|
||||
|
||||
echo "---"
|
||||
|
||||
ls -lh /opt/python/cp*
|
||||
|
||||
ls -lh /opt/python/*/bin
|
||||
|
||||
echo "---"
|
||||
find /opt/python/cp* -name "libpython*"
|
||||
echo "-----"
|
||||
find /opt/_internal/cp* -name "libpython*"
|
||||
|
||||
echo "-----"
|
||||
find / -name "libpython*"
|
||||
echo "----"
|
||||
ls -lh /usr/lib64/libpython3.so
|
||||
|
||||
# cp36-cp36m
|
||||
# cp37-cp37m
|
||||
# cp38-cp38
|
||||
# cp39-cp39
|
||||
# cp310-cp310
|
||||
# cp311-cp311
|
||||
# cp312-cp312
|
||||
# cp313-cp313
|
||||
# cp313-cp313t (no gil)
|
||||
if [[ $PYTHON_VERSION == "3.6" ]]; then
|
||||
python_dir=/opt/python/cp36-cp36m
|
||||
export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.7" ]]; then
|
||||
python_dir=/opt/python/cp37-cp37m
|
||||
export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.8" ]]; then
|
||||
python_dir=/opt/python/cp38-cp38
|
||||
export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.9" ]]; then
|
||||
python_dir=/opt/python/cp39-cp39
|
||||
export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.10" ]]; then
|
||||
python_dir=/opt/python/cp310-cp310
|
||||
export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.11" ]]; then
|
||||
python_dir=/opt/python/cp311-cp311
|
||||
export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.12" ]]; then
|
||||
python_dir=/opt/python/cp312-cp312
|
||||
export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.13" ]]; then
|
||||
python_dir=/opt/python/cp313-cp313
|
||||
export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
|
||||
else
|
||||
echo "Unsupported Python version $PYTHON_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export PYTHON_INSTALL_DIR=$python_dir
|
||||
export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
|
||||
|
||||
python3 --version
|
||||
which python3
|
||||
|
||||
/var/www/scripts/github_actions/build-ubuntu-cpu-arm64.sh
|
||||
|
||||
- name: Display wheels
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh ./wheelhouse/
|
||||
|
||||
# https://huggingface.co/docs/hub/spaces-github-actions
|
||||
- name: Publish to huggingface
|
||||
if: github.repository_owner == 'csukuangfj'
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v2
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
rm -rf huggingface
|
||||
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
|
||||
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
|
||||
cd huggingface
|
||||
git pull
|
||||
|
||||
d=cpu/1.25.5.dev20250307/linux-arm64
|
||||
mkdir -p $d
|
||||
cp -v ../wheelhouse/*.whl ./$d
|
||||
git status
|
||||
git lfs track "*.whl"
|
||||
git add .
|
||||
git commit -m "upload ubuntu-arm64-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
|
168
.github/workflows/ubuntu-cpu-wheels.yml
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
name: build-wheels-cpu-ubuntu
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
# - wheel
|
||||
- torch-2.8.0
|
||||
tags:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: build-wheels-cpu-ubuntu-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
# python ./scripts/github_actions/generate_build_matrix.py
|
||||
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py)
|
||||
|
||||
python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
|
||||
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
build-manylinux-wheels:
|
||||
needs: generate_build_matrix
|
||||
name: ${{ matrix.torch }} ${{ matrix.python-version }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
|
||||
# https://github.com/pytorch/builder/tree/main/manywheel
|
||||
# https://github.com/pytorch/builder/pull/476
|
||||
# https://github.com/k2-fsa/k2/issues/733
|
||||
# https://github.com/pytorch/pytorch/pull/50633 (generate build matrix)
|
||||
- name: Run the build process with Docker
|
||||
uses: addnab/docker-run-action@v3
|
||||
with:
|
||||
image: ${{ matrix.image }}
|
||||
options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }}
|
||||
run: |
|
||||
echo "pwd: $PWD"
|
||||
uname -a
|
||||
id
|
||||
cat /etc/*release
|
||||
gcc --version
|
||||
python3 --version
|
||||
which python3
|
||||
|
||||
ls -lh /opt/python/
|
||||
|
||||
echo "---"
|
||||
|
||||
ls -lh /opt/python/cp*
|
||||
|
||||
ls -lh /opt/python/*/bin
|
||||
|
||||
echo "---"
|
||||
find /opt/python/cp* -name "libpython*"
|
||||
echo "-----"
|
||||
find /opt/_internal/cp* -name "libpython*"
|
||||
|
||||
echo "-----"
|
||||
find / -name "libpython*"
|
||||
echo "----"
|
||||
ls -lh /usr/lib64/libpython3.so || true
|
||||
|
||||
# cp36-cp36m
|
||||
# cp37-cp37m
|
||||
# cp38-cp38
|
||||
# cp39-cp39
|
||||
# cp310-cp310
|
||||
# cp311-cp311
|
||||
# cp312-cp312
|
||||
# cp313-cp313
|
||||
# cp313-cp313t (no gil)
|
||||
if [[ $PYTHON_VERSION == "3.6" ]]; then
|
||||
python_dir=/opt/python/cp36-cp36m
|
||||
export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.7" ]]; then
|
||||
python_dir=/opt/python/cp37-cp37m
|
||||
export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.8" ]]; then
|
||||
python_dir=/opt/python/cp38-cp38
|
||||
export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.9" ]]; then
|
||||
python_dir=/opt/python/cp39-cp39
|
||||
export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.10" ]]; then
|
||||
python_dir=/opt/python/cp310-cp310
|
||||
export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.11" ]]; then
|
||||
python_dir=/opt/python/cp311-cp311
|
||||
export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.12" ]]; then
|
||||
python_dir=/opt/python/cp312-cp312
|
||||
export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.13" ]]; then
|
||||
python_dir=/opt/python/cp313-cp313
|
||||
export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
|
||||
else
|
||||
echo "Unsupported Python version $PYTHON_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export PYTHON_INSTALL_DIR=$python_dir
|
||||
export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
|
||||
|
||||
python3 --version
|
||||
which python3
|
||||
|
||||
/var/www/scripts/github_actions/build-ubuntu-cpu.sh
|
||||
|
||||
- name: Display wheels
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh ./wheelhouse/
|
||||
|
||||
# https://huggingface.co/docs/hub/spaces-github-actions
|
||||
- name: Publish to huggingface
|
||||
if: github.repository_owner == 'csukuangfj'
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v2
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
rm -rf huggingface
|
||||
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
|
||||
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
|
||||
cd huggingface
|
||||
git pull
|
||||
|
||||
d=cpu/1.25.5.dev20250307/linux-x64
|
||||
mkdir -p $d
|
||||
cp -v ../wheelhouse/*.whl ./$d
|
||||
git status
|
||||
git lfs track "*.whl"
|
||||
git add .
|
||||
git commit -m "upload ubuntu-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
|
194
.github/workflows/ubuntu-cuda-wheels.yml
vendored
Normal file
@ -0,0 +1,194 @@
|
||||
name: build-wheels-cuda-ubuntu
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- wheel
|
||||
# - torch-2.7.1
|
||||
tags:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: build-wheels-cuda-ubuntu-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
# python ./scripts/github_actions/generate_build_matrix.py --enable-cuda
|
||||
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --enable-cuda)
|
||||
|
||||
python ./scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch
|
||||
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
build-manylinux-wheels:
|
||||
needs: generate_build_matrix
|
||||
name: ${{ matrix.torch }} ${{ matrix.python-version }} cuda${{ matrix.cuda }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Free space
|
||||
shell: bash
|
||||
run: |
|
||||
df -h
|
||||
rm -rf /opt/hostedtoolcache
|
||||
df -h
|
||||
echo "pwd: $PWD"
|
||||
echo "github.workspace ${{ github.workspace }}"
|
||||
|
||||
# see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
|
||||
# https://github.com/pytorch/builder/tree/main/manywheel
|
||||
# https://github.com/pytorch/builder/pull/476
|
||||
# https://github.com/k2-fsa/k2/issues/733
|
||||
# https://github.com/pytorch/pytorch/pull/50633 (generate build matrix)
|
||||
- name: Run the build process with Docker
|
||||
uses: addnab/docker-run-action@v3
|
||||
with:
|
||||
image: ${{ matrix.image }}
|
||||
options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }} -e CUDA_VERSION=${{ matrix.cuda }}
|
||||
run: |
|
||||
echo "pwd: $PWD"
|
||||
uname -a
|
||||
id
|
||||
cat /etc/*release
|
||||
gcc --version
|
||||
python3 --version
|
||||
which python3
|
||||
|
||||
ls -lh /opt/python/
|
||||
|
||||
echo "---"
|
||||
|
||||
ls -lh /opt/python/cp*
|
||||
|
||||
ls -lh /opt/python/*/bin
|
||||
|
||||
echo "---"
|
||||
find /opt/python/cp* -name "libpython*"
|
||||
echo "-----"
|
||||
find /opt/_internal/cp* -name "libpython*"
|
||||
|
||||
echo "-----"
|
||||
find / -name "libpython*"
|
||||
|
||||
# cp36-cp36m
|
||||
# cp37-cp37m
|
||||
# cp38-cp38
|
||||
# cp39-cp39
|
||||
# cp310-cp310
|
||||
# cp311-cp311
|
||||
# cp312-cp312
|
||||
# cp313-cp313
|
||||
# cp313-cp313t (no gil)
|
||||
if [[ $PYTHON_VERSION == "3.6" ]]; then
|
||||
python_dir=/opt/python/cp36-cp36m
|
||||
export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.7" ]]; then
|
||||
python_dir=/opt/python/cp37-cp37m
|
||||
export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.8" ]]; then
|
||||
python_dir=/opt/python/cp38-cp38
|
||||
export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.9" ]]; then
|
||||
python_dir=/opt/python/cp39-cp39
|
||||
export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.10" ]]; then
|
||||
python_dir=/opt/python/cp310-cp310
|
||||
export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.11" ]]; then
|
||||
python_dir=/opt/python/cp311-cp311
|
||||
export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.12" ]]; then
|
||||
python_dir=/opt/python/cp312-cp312
|
||||
export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
|
||||
elif [[ $PYTHON_VERSION == "3.13" ]]; then
|
||||
python_dir=/opt/python/cp313-cp313
|
||||
export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
|
||||
else
|
||||
echo "Unsupported Python version $PYTHON_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export PYTHON_INSTALL_DIR=$python_dir
|
||||
export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
|
||||
|
||||
# There are no libpython.so inside $PYTHON_INSTALL_DIR
|
||||
# since they are statically linked.
|
||||
|
||||
python3 --version
|
||||
which python3
|
||||
|
||||
pushd /usr/local
|
||||
rm cuda
|
||||
ln -s cuda-$CUDA_VERSION cuda
|
||||
popd
|
||||
which nvcc
|
||||
nvcc --version
|
||||
|
||||
cp /var/www/scripts/github_actions/install_torch.sh .
|
||||
chmod +x install_torch.sh
|
||||
|
||||
/var/www/scripts/github_actions/build-ubuntu-cuda.sh
|
||||
|
||||
- name: Display wheels
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh ./wheelhouse/
|
||||
|
||||
- name: Upload Wheel
|
||||
if: false
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cuda-is_2_28-${{ matrix.is_2_28 }}
|
||||
path: wheelhouse/*.whl
|
||||
|
||||
# https://huggingface.co/docs/hub/spaces-github-actions
|
||||
- name: Publish to huggingface
|
||||
if: github.repository_owner == 'csukuangfj'
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v2
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
rm -rf huggingface
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
|
||||
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
|
||||
cd huggingface
|
||||
git pull
|
||||
|
||||
d=cuda/1.25.5.dev20241029/linux
|
||||
mkdir -p $d
|
||||
cp -v ../wheelhouse/*.whl ./$d
|
||||
git status
|
||||
git lfs track "*.whl"
|
||||
git add .
|
||||
git commit -m "upload ubuntu-cuda wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
|
108
.github/workflows/windows-x64-cpu-wheels.yml
vendored
Normal file
@ -0,0 +1,108 @@
|
||||
name: build-wheels-cpu-win64
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
# - wheel
|
||||
- torch-2.8.0
|
||||
tags:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: build-wheels-cpu-win64-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
# python ./scripts/github_actions/generate_build_matrix.py --for-windows
|
||||
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-windows)
|
||||
|
||||
python ./scripts/github_actions/generate_build_matrix.py --for-windows --test-only-latest-torch
|
||||
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-windows --test-only-latest-torch)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
build_wheels_win64_cpu:
|
||||
needs: generate_build_matrix
|
||||
name: ${{ matrix.torch }} ${{ matrix.python-version }}
|
||||
runs-on: windows-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
pip install -q torch==${{ matrix.torch}} cmake numpy wheel>=0.40.0 twine setuptools
|
||||
pip install torch==${{ matrix.torch}}+cpu -f https://download.pytorch.org/whl/torch_stable.html cmake numpy || pip install torch==${{ matrix.torch}}+cpu -f https://download.pytorch.org/whl/torch/ cmake numpy
|
||||
|
||||
- name: Build wheel
|
||||
shell: bash
|
||||
run: |
|
||||
python3 setup.py bdist_wheel
|
||||
mkdir wheelhouse
|
||||
cp -v dist/* wheelhouse
|
||||
|
||||
- name: Display wheels
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh ./wheelhouse/
|
||||
|
||||
- name: Upload Wheel
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-windows-latest-cpu
|
||||
path: wheelhouse/*.whl
|
||||
|
||||
# https://huggingface.co/docs/hub/spaces-github-actions
|
||||
- name: Publish to huggingface
|
||||
if: github.repository_owner == 'csukuangfj'
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v2
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
rm -rf huggingface
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
|
||||
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
|
||||
cd huggingface
|
||||
git pull
|
||||
|
||||
d=cpu/1.25.5.dev20241029/windows
|
||||
mkdir -p $d
|
||||
cp -v ../wheelhouse/*.whl ./$d
|
||||
git status
|
||||
git lfs track "*.whl"
|
||||
git add .
|
||||
git commit -m "upload windows-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
|
3
.gitignore
vendored
@ -5,3 +5,6 @@ dist/
|
||||
__pycache__/
|
||||
test-1hour.wav
|
||||
path.sh
|
||||
torch_version.py
|
||||
cpu*.html
|
||||
cuda*.html
|
||||
|
@ -1,4 +1,7 @@
|
||||
# Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang)
|
||||
if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
|
||||
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
|
||||
endif()
|
||||
|
||||
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
|
||||
|
||||
@ -6,7 +9,8 @@ project(kaldifeat)
|
||||
|
||||
# remember to change the version in
|
||||
# scripts/conda/kaldifeat/meta.yaml
|
||||
set(kaldifeat_VERSION "1.6")
|
||||
# scripts/conda-cpu/kaldifeat/meta.yaml
|
||||
set(kaldifeat_VERSION "1.25.5")
|
||||
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
|
||||
@ -15,32 +19,102 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
|
||||
set(CMAKE_SKIP_BUILD_RPATH FALSE)
|
||||
set(BUILD_RPATH_USE_ORIGIN TRUE)
|
||||
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
||||
set(CMAKE_INSTALL_RPATH "$ORIGIN")
|
||||
set(CMAKE_BUILD_RPATH "$ORIGIN")
|
||||
|
||||
|
||||
if(NOT APPLE)
|
||||
set(kaldifeat_rpath_origin "$ORIGIN")
|
||||
else()
|
||||
set(kaldifeat_rpath_origin "@loader_path")
|
||||
endif()
|
||||
|
||||
set(CMAKE_INSTALL_RPATH ${kaldifeat_rpath_origin})
|
||||
set(CMAKE_BUILD_RPATH ${kaldifeat_rpath_origin})
|
||||
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "No CMAKE_BUILD_TYPE given, default to Release")
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
|
||||
if (NOT CMAKE_CXX_STANDARD)
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.")
|
||||
endif()
|
||||
message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
|
||||
option(BUILD_SHARED_LIBS "Whether to build shared libraries" ON)
|
||||
option(kaldifeat_BUILD_TESTS "Whether to build tests or not" OFF)
|
||||
option(kaldifeat_BUILD_PYMODULE "Whether to build python module or not" ON)
|
||||
|
||||
message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
|
||||
|
||||
option(BUILD_TESTS "Whether to build tests or not" ON)
|
||||
if(BUILD_SHARED_LIBS AND MSVC)
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
endif()
|
||||
|
||||
include(pybind11)
|
||||
if(kaldifeat_BUILD_PYMODULE)
|
||||
include(pybind11)
|
||||
endif()
|
||||
# to prevent cmake from trying to link with system installed mkl since we not directly use it
|
||||
# mkl libraries should be linked with pytorch already
|
||||
# ref: https://github.com/pytorch/pytorch/blob/master/cmake/public/mkl.cmake
|
||||
set(CMAKE_DISABLE_FIND_PACKAGE_MKL TRUE)
|
||||
include(torch)
|
||||
|
||||
if(BUILD_TESTS)
|
||||
if(kaldifeat_BUILD_TESTS)
|
||||
include(googletest)
|
||||
enable_testing()
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
if(WIN32)
|
||||
# disable various warnings for MSVC
|
||||
# 4624: destructor was implicitly defined as deleted because a base class destructor is inaccessible or deleted
|
||||
set(disabled_warnings
|
||||
/wd4624
|
||||
)
|
||||
message(STATUS "Disabled warnings: ${disabled_warnings}")
|
||||
foreach(w IN LISTS disabled_warnings)
|
||||
string(APPEND CMAKE_CXX_FLAGS " ${w} ")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
|
||||
|
||||
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
add_subdirectory(kaldifeat)
|
||||
|
||||
# TORCH_VERSION is defined in cmake/torch.cmake
|
||||
configure_file(
|
||||
${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py.in
|
||||
${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py @ONLY
|
||||
)
|
||||
|
||||
configure_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/kaldifeatConfigVersion.cmake.in
|
||||
${PROJECT_BINARY_DIR}/kaldifeatConfigVersion.cmake
|
||||
@ONLY
|
||||
)
|
||||
|
||||
configure_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/kaldifeatConfig.cmake.in
|
||||
${PROJECT_BINARY_DIR}/kaldifeatConfig.cmake
|
||||
@ONLY
|
||||
)
|
||||
|
||||
install(FILES
|
||||
${PROJECT_BINARY_DIR}/kaldifeatConfigVersion.cmake
|
||||
${PROJECT_BINARY_DIR}/kaldifeatConfig.cmake
|
||||
DESTINATION share/cmake/kaldifeat
|
||||
)
|
||||
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py
|
||||
DESTINATION ./
|
||||
)
|
||||
|
@ -1,3 +1,5 @@
|
||||
include LICENSE
|
||||
include README.md
|
||||
include CMakeLists.txt
|
||||
exclude pyproject.toml
|
||||
recursive-include kaldifeat *.*
|
||||
|
160
README.md
@ -1,12 +1,33 @@
|
||||
# kaldifeat
|
||||
|
||||
<div align="center">
|
||||
<img src="/doc/source/images/os.svg">
|
||||
<img src="/doc/source/images/python-3.6_3.7_3.8-blue.svg">
|
||||
<img src="/doc/source/images/pytorch-1.6.0_1.7.0_1.7.1_1.8.0_1.8.1_1.9.0-green.svg">
|
||||
<img src="/doc/source/images/cuda-10.1_10.2_11.0_11.1-orange.svg">
|
||||
<img src="/doc/source/images/os-green.svg">
|
||||
<img src="/doc/source/images/python_ge_3.6-blue.svg">
|
||||
<img src="/doc/source/images/pytorch_ge_1.5.0-green.svg">
|
||||
<img src="/doc/source/images/cuda_ge_10.1-orange.svg">
|
||||
</div>
|
||||
|
||||
[](https://csukuangfj.github.io/kaldifeat/)
|
||||
|
||||
**Documentation**: <https://csukuangfj.github.io/kaldifeat>
|
||||
|
||||
**Note**: If you are looking for a version that does not depend on PyTorch,
|
||||
please see <https://github.com/csukuangfj/kaldi-native-fbank>
|
||||
|
||||
# Installation
|
||||
|
||||
Refer to
|
||||
<https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html>
|
||||
for installation.
|
||||
|
||||
> Never use `pip install kaldifeat`
|
||||
|
||||
> Never use `pip install kaldifeat`
|
||||
|
||||
> Never use `pip install kaldifeat`
|
||||
|
||||
|
||||
|
||||
<sub>
|
||||
<table>
|
||||
<tr>
|
||||
@ -16,6 +37,36 @@
|
||||
<th>Usage</th>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Fbank for <a href="https://github.com/openai/whisper">Whisper</a></td>
|
||||
<td><code>kaldifeat.WhisperFbankOptions</code></td>
|
||||
<td><code>kaldifeat.WhisperFbank</code></td>
|
||||
<td>
|
||||
<pre lang="python">
|
||||
opts = kaldifeat.WhisperFbankOptions()
|
||||
opts.device = torch.device('cuda', 0)
|
||||
fbank = kaldifeat.WhisperFbank(opts)
|
||||
features = fbank(wave)
|
||||
</pre>
|
||||
See <a href="https://github.com/csukuangfj/kaldifeat/pull/82">#82</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Fbank for <a href="https://github.com/openai/whisper">Whisper-V3</a></td>
|
||||
<td><code>kaldifeat.WhisperFbankOptions</code></td>
|
||||
<td><code>kaldifeat.WhisperFbank</code></td>
|
||||
<td>
|
||||
<pre lang="python">
|
||||
opts = kaldifeat.WhisperFbankOptions()
|
||||
opts.num_mels = 128
|
||||
opts.device = torch.device('cuda', 0)
|
||||
fbank = kaldifeat.WhisperFbank(opts)
|
||||
features = fbank(wave)
|
||||
</pre>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>FBANK</td>
|
||||
<td><code>kaldifeat.FbankOptions</code></td>
|
||||
@ -31,6 +82,17 @@ features = fbank(wave)
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Streaming FBANK</td>
|
||||
<td><code>kaldifeat.FbankOptions</code></td>
|
||||
<td><code>kaldifeat.OnlineFbank</code></td>
|
||||
<td>
|
||||
See <a href="./kaldifeat/python/tests/test_fbank.py">
|
||||
./kaldifeat/python/tests/test_fbank.py
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>MFCC</td>
|
||||
<td><code>kaldifeat.MfccOptions</code></td>
|
||||
@ -45,6 +107,17 @@ features = mfcc(wave)
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Streaming MFCC</td>
|
||||
<td><code>kaldifeat.MfccOptions</code></td>
|
||||
<td><code>kaldifeat.OnlineMfcc</code></td>
|
||||
<td>
|
||||
See <a href="./kaldifeat/python/tests/test_mfcc.py">
|
||||
./kaldifeat/python/tests/test_mfcc.py
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>PLP</td>
|
||||
<td><code>kaldifeat.PlpOptions</code></td>
|
||||
@ -59,6 +132,17 @@ features = plp(wave)
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Streaming PLP</td>
|
||||
<td><code>kaldifeat.PlpOptions</code></td>
|
||||
<td><code>kaldifeat.OnlinePlp</code></td>
|
||||
<td>
|
||||
See <a href="./kaldifeat/python/tests/test_plp.py">
|
||||
./kaldifeat/python/tests/test_plp.py
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Spectorgram</td>
|
||||
<td><code>kaldifeat.SpectrogramOptions</code></td>
|
||||
@ -88,6 +172,8 @@ The following kaldi-compatible commandline tools are implemented:
|
||||
|
||||
(**NOTE**: We will implement other types of features, e.g., Pitch, ivector, etc, soon.)
|
||||
|
||||
**HINT**: It supports also streaming feature extractors for Fbank, MFCC, and Plp.
|
||||
|
||||
# Usage
|
||||
|
||||
Let us first generate a test wave using sox:
|
||||
@ -199,7 +285,12 @@ Please refer to
|
||||
- [kaldifeat/python/tests/test_mfcc.py](kaldifeat/python/tests/test_mfcc.py)
|
||||
- [kaldifeat/python/tests/test_plp.py](kaldifeat/python/tests/test_plp.py)
|
||||
- [kaldifeat/python/tests/test_spectrogram.py](kaldifeat/python/tests/test_spectrogram.py)
|
||||
- [kaldifeat/python/tests/test_options.py](kaldifeat/python/tests/test_options.py)
|
||||
- [kaldifeat/python/tests/test_frame_extraction_options.py](kaldifeat/python/tests/test_frame_extraction_options.py)
|
||||
- [kaldifeat/python/tests/test_mel_bank_options.py](kaldifeat/python/tests/test_mel_bank_options.py)
|
||||
- [kaldifeat/python/tests/test_fbank_options.py](kaldifeat/python/tests/test_fbank_options.py)
|
||||
- [kaldifeat/python/tests/test_mfcc_options.py](kaldifeat/python/tests/test_mfcc_options.py)
|
||||
- [kaldifeat/python/tests/test_spectrogram_options.py](kaldifeat/python/tests/test_spectrogram_options.py)
|
||||
- [kaldifeat/python/tests/test_plp_options.py](kaldifeat/python/tests/test_plp_options.py)
|
||||
|
||||
for more examples.
|
||||
|
||||
@ -208,48 +299,31 @@ for more examples.
|
||||
- ``kaldifeat`` supports batch processing as well as chunk processing
|
||||
- ``kaldifeat`` uses the same options as `Kaldi`'s `compute-fbank-feats` and `compute-mfcc-feats`
|
||||
|
||||
# Installation
|
||||
# Usage in other projects
|
||||
|
||||
## From conda
|
||||
## icefall
|
||||
|
||||
Supported versions of Python, PyTorch, and CUDA toolkit are listed below:
|
||||
[icefall](https://github.com/k2-fsa/icefall) uses kaldifeat to extract features for a pre-trained model.
|
||||
|
||||
[](/doc/source/images/python-3.6_3.7_3.8-blue.svg)
|
||||
[](/doc/source/images/pytorch-1.6.0_1.7.0_1.7.1_1.8.0_1.8.1_1.9.0-green.svg)
|
||||
[](/doc/source/images/cuda-10.1_10.2_11.0_11.1-orange.svg)
|
||||
See <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/conformer_ctc/pretrained.py>.
|
||||
|
||||
```bash
|
||||
conda install -c kaldifeat -c pytorch -c conda-forge kaldifeat python=3.8 cudatoolkit=11.1 pytorch=1.8.1
|
||||
```
|
||||
## k2
|
||||
|
||||
You can select the supported Python version, CUDA toolkit version and PyTorch version as you wish.
|
||||
[k2](https://github.com/k2-fsa/k2) uses kaldifeat's C++ API.
|
||||
|
||||
See <https://github.com/k2-fsa/k2/blob/v2.0-pre/k2/torch/csrc/features.cu>.
|
||||
|
||||
## lhotse
|
||||
|
||||
[lhotse](https://github.com/lhotse-speech/lhotse) uses kaldifeat to extract features on GPU.
|
||||
|
||||
See <https://github.com/lhotse-speech/lhotse/blob/master/lhotse/features/kaldifeat.py>.
|
||||
|
||||
## sherpa
|
||||
|
||||
[sherpa](https://github.com/k2-fsa/sherpa) uses kaldifeat for streaming feature
|
||||
extraction.
|
||||
|
||||
See <https://github.com/k2-fsa/sherpa/blob/master/sherpa/bin/pruned_stateless_emformer_rnnt2/decode.py>
|
||||
|
||||
|
||||
## From PyPi with pip
|
||||
|
||||
You need to install PyTorch and CMake first.
|
||||
cmake 3.11 is known to work. Other cmake versions may also work.
|
||||
PyTorch 1.6.0 and above are known to work. Other PyTorch versions may also work.
|
||||
|
||||
```bash
|
||||
pip install -v kaldilm
|
||||
```
|
||||
|
||||
## From source
|
||||
|
||||
The following are the commands to compile `kaldifeat` from source.
|
||||
We assume that you have installed `cmake` and PyTorch.
|
||||
cmake 3.11 is known to work. Other cmake versions may also work.
|
||||
PyTorch 1.6.0 and above are known to work. Other PyTorch versions may also work.
|
||||
|
||||
```bash
|
||||
mkdir /some/path
|
||||
git clone https://github.com/csukuangfj/kaldifeat.git
|
||||
cd kaldifeat
|
||||
python setup.py install
|
||||
```
|
||||
|
||||
To test whether `kaldifeat` was installed successfully, you can run:
|
||||
```bash
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
```
|
||||
|
@ -8,9 +8,15 @@ import sys
|
||||
from pathlib import Path
|
||||
|
||||
import setuptools
|
||||
import torch
|
||||
from setuptools.command.build_ext import build_ext
|
||||
|
||||
|
||||
def get_pytorch_version():
|
||||
# if it is 1.7.1+cuda101, then strip +cuda101
|
||||
return torch.__version__.split("+")[0]
|
||||
|
||||
|
||||
def is_for_pypi():
|
||||
ans = os.environ.get("KALDIFEAT_IS_FOR_PYPI", None)
|
||||
return ans is not None
|
||||
@ -20,6 +26,10 @@ def is_macos():
|
||||
return platform.system() == "Darwin"
|
||||
|
||||
|
||||
def is_windows():
|
||||
return platform.system() == "Windows"
|
||||
|
||||
|
||||
try:
|
||||
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
|
||||
|
||||
@ -35,7 +45,6 @@ try:
|
||||
# -linux_x86_64.whl
|
||||
self.root_is_pure = False
|
||||
|
||||
|
||||
except ImportError:
|
||||
bdist_wheel = None
|
||||
|
||||
@ -63,34 +72,67 @@ class BuildExtension(build_ext):
|
||||
if cmake_args == "":
|
||||
cmake_args = "-DCMAKE_BUILD_TYPE=Release"
|
||||
|
||||
if make_args == "" and system_make_args == "":
|
||||
print("For fast compilation, run:")
|
||||
print('export KALDIFEAT_MAKE_ARGS="-j"; python setup.py install')
|
||||
extra_cmake_args = " -Dkaldifeat_BUILD_TESTS=OFF "
|
||||
extra_cmake_args += f" -DCMAKE_INSTALL_PREFIX={Path(self.build_lib).resolve()}/kaldifeat " # noqa
|
||||
|
||||
major, minor = get_pytorch_version().split(".")[:2]
|
||||
print("major, minor", major, minor)
|
||||
major = int(major)
|
||||
minor = int(minor)
|
||||
if major > 2 or (major == 2 and minor >= 1):
|
||||
extra_cmake_args += f" -DCMAKE_CXX_STANDARD=17 "
|
||||
|
||||
if "PYTHON_EXECUTABLE" not in cmake_args:
|
||||
print(f"Setting PYTHON_EXECUTABLE to {sys.executable}")
|
||||
cmake_args += f" -DPYTHON_EXECUTABLE={sys.executable}"
|
||||
|
||||
build_cmd = f"""
|
||||
cd {self.build_temp}
|
||||
cmake_args += extra_cmake_args
|
||||
|
||||
cmake {cmake_args} {kaldifeat_dir}
|
||||
|
||||
|
||||
make {make_args} _kaldifeat
|
||||
"""
|
||||
print(f"build command is:\n{build_cmd}")
|
||||
|
||||
ret = os.system(build_cmd)
|
||||
if ret != 0:
|
||||
raise Exception(
|
||||
"\nBuild kaldifeat failed. Please check the error message.\n"
|
||||
"You can ask for help by creating an issue on GitHub.\n"
|
||||
"\nClick:\n\thttps://github.com/csukuangfj/kaldifeat/issues/new\n" # noqa
|
||||
if is_windows():
|
||||
build_cmd = f"""
|
||||
cmake {cmake_args} -B {self.build_temp} -S {kaldifeat_dir}
|
||||
cmake --build {self.build_temp} --target _kaldifeat --config Release -- -m
|
||||
cmake --build {self.build_temp} --target install --config Release -- -m
|
||||
"""
|
||||
print(f"build command is:\n{build_cmd}")
|
||||
ret = os.system(
|
||||
f"cmake {cmake_args} -B {self.build_temp} -S {kaldifeat_dir}"
|
||||
)
|
||||
if ret != 0:
|
||||
raise Exception("Failed to configure kaldifeat")
|
||||
|
||||
lib_so = glob.glob(f"{self.build_temp}/lib/*kaldifeat*.so")
|
||||
lib_so += glob.glob(f"{self.build_temp}/lib/*kaldifeat*.dylib") # macOS
|
||||
for so in lib_so:
|
||||
print(f"Copying {so} to {self.build_lib}/")
|
||||
shutil.copy(f"{so}", f"{self.build_lib}/")
|
||||
ret = os.system(
|
||||
f"cmake --build {self.build_temp} --target _kaldifeat --config Release -- -m"
|
||||
)
|
||||
if ret != 0:
|
||||
raise Exception("Failed to build kaldifeat")
|
||||
|
||||
ret = os.system(
|
||||
f"cmake --build {self.build_temp} --target install --config Release -- -m"
|
||||
)
|
||||
if ret != 0:
|
||||
raise Exception("Failed to install kaldifeat")
|
||||
else:
|
||||
if make_args == "" and system_make_args == "":
|
||||
print("For fast compilation, run:")
|
||||
print('export KALDIFEAT_MAKE_ARGS="-j"; python setup.py install')
|
||||
make_args = " -j4 "
|
||||
print("Setting make_args to '-j4'")
|
||||
|
||||
build_cmd = f"""
|
||||
cd {self.build_temp}
|
||||
|
||||
cmake {cmake_args} {kaldifeat_dir}
|
||||
|
||||
|
||||
make {make_args} _kaldifeat install
|
||||
"""
|
||||
print(f"build command is:\n{build_cmd}")
|
||||
|
||||
ret = os.system(build_cmd)
|
||||
if ret != 0:
|
||||
raise Exception(
|
||||
"\nBuild kaldifeat failed. Please check the error message.\n"
|
||||
"You can ask for help by creating an issue on GitHub.\n"
|
||||
"\nClick:\n\thttps://github.com/csukuangfj/kaldifeat/issues/new\n" # noqa
|
||||
)
|
||||
|
@ -24,8 +24,28 @@ function(download_googltest)
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
set(googletest_URL "https://github.com/google/googletest/archive/release-1.10.0.tar.gz")
|
||||
set(googletest_HASH "SHA256=9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb")
|
||||
set(googletest_URL "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
|
||||
set(googletest_URL2 "https://huggingface.co/csukuangfj/k2-cmake-deps/resolve/main/googletest-1.13.0.tar.gz")
|
||||
set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363")
|
||||
|
||||
# If you don't have access to the Internet,
|
||||
# please pre-download googletest
|
||||
set(possible_file_locations
|
||||
$ENV{HOME}/Downloads/googletest-1.13.0.tar.gz
|
||||
${PROJECT_SOURCE_DIR}/googletest-1.13.0.tar.gz
|
||||
${PROJECT_BINARY_DIR}/googletest-1.13.0.tar.gz
|
||||
/tmp/googletest-1.13.0.tar.gz
|
||||
/star-fj/fangjun/download/github/googletest-1.13.0.tar.gz
|
||||
)
|
||||
|
||||
foreach(f IN LISTS possible_file_locations)
|
||||
if(EXISTS ${f})
|
||||
set(googletest_URL "${f}")
|
||||
file(TO_CMAKE_PATH "${googletest_URL}" googletest_URL)
|
||||
set(googletest_URL2)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(BUILD_GMOCK ON CACHE BOOL "" FORCE)
|
||||
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
|
||||
@ -33,13 +53,15 @@ function(download_googltest)
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
|
||||
FetchContent_Declare(googletest
|
||||
URL ${googletest_URL}
|
||||
URL
|
||||
${googletest_URL}
|
||||
${googletest_URL2}
|
||||
URL_HASH ${googletest_HASH}
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(googletest)
|
||||
if(NOT googletest_POPULATED)
|
||||
message(STATUS "Downloading googletest")
|
||||
message(STATUS "Downloading googletest from ${googletest_URL}")
|
||||
FetchContent_Populate(googletest)
|
||||
endif()
|
||||
message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}")
|
||||
|
65
cmake/kaldifeatConfig.cmake.in
Normal file
@ -0,0 +1,65 @@
|
||||
# Findkaldifeat
|
||||
# -------------
|
||||
#
|
||||
# Finds the kaldifeat library
|
||||
#
|
||||
# This will define the following variables:
|
||||
#
|
||||
# KALDIFEAT_FOUND -- True if the system has the kaldifeat library
|
||||
# KALDIFEAT_INCLUDE_DIRS -- The include directories for kaldifeat
|
||||
# KALDIFEAT_LIBRARIES -- Libraries to link against
|
||||
# KALDIFEAT_CXX_FLAGS -- Additional (required) compiler flags
|
||||
# KALDIFEAT_TORCH_VERSION_MAJOR -- The major version of PyTorch used to compile kaldifeat
|
||||
# KALDIFEAT_TORCH_VERSION_MINOR -- The minor version of PyTorch used to compile kaldifeat
|
||||
# KALDIFEAT_VERSION -- The version of kaldifeat
|
||||
#
|
||||
# and the following imported targets:
|
||||
#
|
||||
# kaldifeat_core
|
||||
|
||||
# This file is modified from pytorch/cmake/TorchConfig.cmake.in
|
||||
|
||||
set(KALDIFEAT_CXX_FLAGS "@CMAKE_CXX_FLAGS@")
|
||||
set(KALDIFEAT_TORCH_VERSION_MAJOR @KALDIFEAT_TORCH_VERSION_MAJOR@)
|
||||
set(KALDIFEAT_TORCH_VERSION_MINOR @KALDIFEAT_TORCH_VERSION_MINOR@)
|
||||
set(KALDIFEAT_VERSION @kaldifeat_VERSION@)
|
||||
|
||||
if(DEFINED ENV{KALDIFEAT_INSTALL_PREFIX})
|
||||
set(KALDIFEAT_INSTALL_PREFIX $ENV{KALDIFEAT_INSTALL_PREFIX})
|
||||
else()
|
||||
# Assume we are in <install-prefix>/share/cmake/kaldifeat/kaldifeatConfig.cmake
|
||||
get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||
get_filename_component(KALDIFEAT_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
|
||||
endif()
|
||||
|
||||
set(KALDIFEAT_INCLUDE_DIRS ${KALDIFEAT_INSTALL_PREFIX}/include)
|
||||
|
||||
set(KALDIFEAT_LIBRARIES kaldifeat_core)
|
||||
|
||||
foreach(lib IN LISTS KALDIFEAT_LIBRARIES)
|
||||
find_library(location_${lib} ${lib}
|
||||
PATHS
|
||||
"${KALDIFEAT_INSTALL_PREFIX}/lib"
|
||||
"${KALDIFEAT_INSTALL_PREFIX}/lib64"
|
||||
)
|
||||
|
||||
if(NOT MSVC)
|
||||
add_library(${lib} SHARED IMPORTED)
|
||||
else()
|
||||
add_library(${lib} STATIC IMPORTED)
|
||||
endif()
|
||||
|
||||
set_target_properties(${lib} PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${KALDIFEAT_INCLUDE_DIRS}"
|
||||
IMPORTED_LOCATION "${location_${lib}}"
|
||||
CXX_STANDARD 14
|
||||
)
|
||||
|
||||
set_property(TARGET ${lib} PROPERTY INTERFACE_COMPILE_OPTIONS @CMAKE_CXX_FLAGS@)
|
||||
endforeach()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package_handle_standard_args(kaldifeat DEFAULT_MSG
|
||||
location_kaldifeat_core
|
||||
)
|
12
cmake/kaldifeatConfigVersion.cmake.in
Normal file
@ -0,0 +1,12 @@
|
||||
# This file is modified from pytorch/cmake/TorchConfigVersion.cmake.in
|
||||
set(PACKAGE_VERSION "@kaldifeat_VERSION@")
|
||||
|
||||
# Check whether the requested PACKAGE_FIND_VERSION is compatible
|
||||
if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
|
||||
set(PACKAGE_VERSION_COMPATIBLE FALSE)
|
||||
else()
|
||||
set(PACKAGE_VERSION_COMPATIBLE TRUE)
|
||||
if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
|
||||
set(PACKAGE_VERSION_EXACT TRUE)
|
||||
endif()
|
||||
endif()
|
@ -8,20 +8,39 @@ function(download_pybind11)
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
set(pybind11_URL "https://github.com/pybind/pybind11/archive/v2.6.0.tar.gz")
|
||||
set(pybind11_HASH "SHA256=90b705137b69ee3b5fc655eaca66d0dc9862ea1759226f7ccd3098425ae69571")
|
||||
set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.12.0.tar.gz")
|
||||
set(pybind11_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/pybind11-2.12.0.tar.gz")
|
||||
set(pybind11_HASH "SHA256=bf8f242abd1abcd375d516a7067490fb71abd79519a282d22b6e4d19282185a7")
|
||||
|
||||
# If you don't have access to the Internet,
|
||||
# please pre-download pybind11
|
||||
set(possible_file_locations
|
||||
$ENV{HOME}/Downloads/pybind11-2.12.0.tar.gz
|
||||
${CMAKE_SOURCE_DIR}/pybind11-2.12.0.tar.gz
|
||||
${CMAKE_BINARY_DIR}/pybind11-2.12.0.tar.gz
|
||||
/tmp/pybind11-2.12.0.tar.gz
|
||||
/star-fj/fangjun/download/github/pybind11-2.12.0.tar.gz
|
||||
)
|
||||
|
||||
foreach(f IN LISTS possible_file_locations)
|
||||
if(EXISTS ${f})
|
||||
set(pybind11_URL "${f}")
|
||||
file(TO_CMAKE_PATH "${pybind11_URL}" pybind11_URL)
|
||||
set(pybind11_URL2)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(double_quotes "\"")
|
||||
set(dollar "\$")
|
||||
set(semicolon "\;")
|
||||
FetchContent_Declare(pybind11
|
||||
URL ${pybind11_URL}
|
||||
URL
|
||||
${pybind11_URL}
|
||||
${pybind11_URL2}
|
||||
URL_HASH ${pybind11_HASH}
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(pybind11)
|
||||
if(NOT pybind11_POPULATED)
|
||||
message(STATUS "Downloading pybind11")
|
||||
message(STATUS "Downloading pybind11 from ${pybind11_URL}")
|
||||
FetchContent_Populate(pybind11)
|
||||
endif()
|
||||
message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}")
|
||||
|
@ -8,6 +8,7 @@ execute_process(
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
OUTPUT_VARIABLE TORCH_DIR
|
||||
)
|
||||
message(STATUS "TORCH_DIR: ${TORCH_DIR}")
|
||||
|
||||
list(APPEND CMAKE_PREFIX_PATH "${TORCH_DIR}")
|
||||
find_package(Torch REQUIRED)
|
||||
|
@ -1,6 +1,6 @@
|
||||
dataclasses
|
||||
recommonmark
|
||||
sphinx
|
||||
sphinx<7.0
|
||||
sphinx-autodoc-typehints
|
||||
sphinx_rtd_theme
|
||||
sphinxcontrib-bibtex
|
||||
|
0
doc/source/_static/.gitkeep
Normal file
@ -1,72 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2021 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
|
||||
import kaldifeat
|
||||
|
||||
|
||||
def read_wave(filename) -> torch.Tensor:
|
||||
"""Read a wave file and return it as a 1-D tensor.
|
||||
|
||||
Note:
|
||||
You don't need to scale it to [-32768, 32767].
|
||||
We use scaling here to follow the approach in Kaldi.
|
||||
|
||||
Args:
|
||||
filename:
|
||||
Filename of a sound file.
|
||||
Returns:
|
||||
Return a 1-D tensor containing audio samples.
|
||||
"""
|
||||
with sf.SoundFile(filename) as sf_desc:
|
||||
sampling_rate = sf_desc.samplerate
|
||||
assert sampling_rate == 16000
|
||||
data = sf_desc.read(dtype=np.float32, always_2d=False)
|
||||
data *= 32768
|
||||
return torch.from_numpy(data)
|
||||
|
||||
|
||||
def test_fbank():
|
||||
device = torch.device("cpu")
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda", 0)
|
||||
|
||||
wave0 = read_wave("test_data/test.wav")
|
||||
wave1 = read_wave("test_data/test2.wav")
|
||||
|
||||
wave0 = wave0.to(device)
|
||||
wave1 = wave1.to(device)
|
||||
|
||||
opts = kaldifeat.FbankOptions()
|
||||
opts.frame_opts.dither = 0
|
||||
opts.device = device
|
||||
|
||||
fbank = kaldifeat.Fbank(opts)
|
||||
|
||||
# We can compute fbank features in batches
|
||||
features = fbank([wave0, wave1])
|
||||
assert isinstance(features, list), f"{type(features)}"
|
||||
assert len(features) == 2
|
||||
|
||||
# We can also compute fbank features for a single wave
|
||||
features0 = fbank(wave0)
|
||||
features1 = fbank(wave1)
|
||||
|
||||
assert torch.allclose(features[0], features0)
|
||||
assert torch.allclose(features[1], features1)
|
||||
|
||||
# To compute fbank features for only a specified frame
|
||||
audio_frames = fbank.convert_samples_to_frames(wave0)
|
||||
feature_frame_1 = fbank.compute(audio_frames[1])
|
||||
feature_frame_10 = fbank.compute(audio_frames[10])
|
||||
|
||||
assert torch.allclose(features0[1], feature_frame_1)
|
||||
assert torch.allclose(features0[10], feature_frame_10)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_fbank()
|
@ -59,7 +59,7 @@ templates_path = ["_templates"]
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = []
|
||||
exclude_patterns = ["images/*.md"]
|
||||
|
||||
source_suffix = {
|
||||
".rst": "restructuredtext",
|
||||
@ -102,3 +102,35 @@ html_theme_options = {
|
||||
"prev_next_buttons_location": "bottom",
|
||||
"style_external_links": True,
|
||||
}
|
||||
|
||||
rst_epilog = """
|
||||
.. _kaldifeat: https://github.com/csukuangfj/kaldifeat
|
||||
.. _Kaldi: https://github.com/kaldi-asr/kaldi
|
||||
.. _PyTorch: https://pytorch.org/
|
||||
.. _kaldifeat.Fbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L10
|
||||
.. _kaldifeat.Mfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L10
|
||||
.. _kaldifeat.Plp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L10
|
||||
.. _kaldifeat.Spectrogram: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/spectrogram.py#L9
|
||||
.. _kaldifeat.OnlineFbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L16
|
||||
.. _kaldifeat.OnlineMfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L16
|
||||
.. _kaldifeat.OnlinePlp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L16
|
||||
.. _compute-fbank-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-fbank-feats.cc
|
||||
.. _compute-mfcc-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-mfcc-feats.cc
|
||||
.. _compute-plp-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-plp-feats.cc
|
||||
.. _compute-spectrogram-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-spectrogram-feats.cc
|
||||
.. _kaldi::OnlineFbank: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L160
|
||||
.. _kaldi::OnlineMfcc: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L158
|
||||
.. _kaldi::OnlinePlp: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L159
|
||||
.. _kaldifeat.FbankOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-fbank.h#L19
|
||||
.. _kaldi::FbankOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.h#L41
|
||||
.. _kaldifeat.MfccOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-mfcc.h#L22
|
||||
.. _kaldi::MfccOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-mfcc.h#L38
|
||||
.. _kaldifeat.PlpOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-plp.h#L24
|
||||
.. _kaldi::PlpOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-plp.h#L42
|
||||
.. _kaldifeat.SpectrogramOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-spectrogram.h#L18
|
||||
.. _kaldi::SpectrogramOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-spectrogram.h#L38
|
||||
.. _kaldifeat.FrameExtractionOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-window.h#L30
|
||||
.. _kaldi::FrameExtractionOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-window.h#L35
|
||||
.. _kaldifeat.MelBanksOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/mel-computations.h#L17
|
||||
.. _kaldi::MelBanksOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/mel-computations.h#L43
|
||||
"""
|
||||
|
8
doc/source/images/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
## File description
|
||||
|
||||
<https://shields.io/> is used to create the following files:
|
||||
|
||||
- ./os.svg
|
||||
- ./python_ge_3.6-blue.svg
|
||||
- ./cuda_ge_10.1-orange.svg
|
||||
- ./pytorch_ge_1.5.0-green.svg
|
@ -1 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="186" height="20" role="img" aria-label="cuda: 10.1 | 10.2 | 11.0 | 11.1"><title>cuda: 10.1 | 10.2 | 11.0 | 11.1</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="186" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="37" height="20" fill="#555"/><rect x="37" width="149" height="20" fill="#fe7d37"/><rect width="186" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="195" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="270">cuda</text><text x="195" y="140" transform="scale(.1)" fill="#fff" textLength="270">cuda</text><text aria-hidden="true" x="1105" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1390">10.1 | 10.2 | 11.0 | 11.1</text><text x="1105" y="140" transform="scale(.1)" fill="#fff" textLength="1390">10.1 | 10.2 | 11.0 | 11.1</text></g></svg>
|
Before Width: | Height: | Size: 1.2 KiB |
1
doc/source/images/cuda_ge_10.1-orange.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="94" height="20" role="img" aria-label="cuda: >= 10.1"><title>cuda: >= 10.1</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="94" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="37" height="20" fill="#555"/><rect x="37" width="57" height="20" fill="#fe7d37"/><rect width="94" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="195" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="270">cuda</text><text x="195" y="140" transform="scale(.1)" fill="#fff" textLength="270">cuda</text><text aria-hidden="true" x="645" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">>= 10.1</text><text x="645" y="140" transform="scale(.1)" fill="#fff" textLength="470">>= 10.1</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
1
doc/source/images/os-green.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="176" height="20" role="img" aria-label="os: Linux | macOS | Windows"><title>os: Linux | macOS | Windows</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="176" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="153" height="20" fill="#97ca00"/><rect width="176" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">os</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">os</text><text aria-hidden="true" x="985" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1430">Linux | macOS | Windows</text><text x="985" y="140" transform="scale(.1)" fill="#fff" textLength="1430">Linux | macOS | Windows</text></g></svg>
|
After Width: | Height: | Size: 1.2 KiB |
@ -1 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="118" height="20" role="img" aria-label="OS: Linux | macOS"><title>OS: Linux | macOS</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="118" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="27" height="20" fill="#555"/><rect x="27" width="91" height="20" fill="#4c1"/><rect width="118" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="145" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="170">OS</text><text x="145" y="140" transform="scale(.1)" fill="#fff" textLength="170">OS</text><text aria-hidden="true" x="715" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="810">Linux | macOS</text><text x="715" y="140" transform="scale(.1)" fill="#fff" textLength="810">Linux | macOS</text></g></svg>
|
Before Width: | Height: | Size: 1.1 KiB |
@ -1 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="138" height="20" role="img" aria-label="python: 3.6 | 3.7 | 3.8"><title>python: 3.6 | 3.7 | 3.8</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="138" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="89" height="20" fill="#007ec6"/><rect width="138" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="925" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="790">3.6 | 3.7 | 3.8</text><text x="925" y="140" transform="scale(.1)" fill="#fff" textLength="790">3.6 | 3.7 | 3.8</text></g></svg>
|
Before Width: | Height: | Size: 1.2 KiB |
1
doc/source/images/python_ge_3.6-blue.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: >= 3.6"><title>python: >= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">>= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">>= 3.6</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
@ -1 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="298" height="20" role="img" aria-label="pytorch: 1.6.0 | 1.7.0 | 1.7.1 | 1.8.0 | 1.8.1 | 1.9.0"><title>pytorch: 1.6.0 | 1.7.0 | 1.7.1 | 1.8.0 | 1.8.1 | 1.9.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="298" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="51" height="20" fill="#555"/><rect x="51" width="247" height="20" fill="#97ca00"/><rect width="298" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="265" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="410">pytorch</text><text x="265" y="140" transform="scale(.1)" fill="#fff" textLength="410">pytorch</text><text aria-hidden="true" x="1735" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="2370">1.6.0 | 1.7.0 | 1.7.1 | 1.8.0 | 1.8.1 | 1.9.0</text><text x="1735" y="140" transform="scale(.1)" fill="#fff" textLength="2370">1.6.0 | 1.7.0 | 1.7.1 | 1.8.0 | 1.8.1 | 1.9.0</text></g></svg>
|
Before Width: | Height: | Size: 1.3 KiB |
1
doc/source/images/pytorch_ge_1.5.0-green.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="112" height="20" role="img" aria-label="pytorch: >= 1.5.0"><title>pytorch: >= 1.5.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="112" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="51" height="20" fill="#555"/><rect x="51" width="61" height="20" fill="#97ca00"/><rect width="112" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="265" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="410">pytorch</text><text x="265" y="140" transform="scale(.1)" fill="#fff" textLength="410">pytorch</text><text aria-hidden="true" x="805" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">>= 1.5.0</text><text x="805" y="140" transform="scale(.1)" fill="#fff" textLength="510">>= 1.5.0</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
@ -6,19 +6,11 @@
|
||||
kaldifeat
|
||||
=========
|
||||
|
||||
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ implements
|
||||
feature extraction algorithms **compatible** with kaldi using PyTorch, supporting CUDA
|
||||
as well as autograd.
|
||||
|
||||
Currently, only fbank features are supported.
|
||||
It can produce the same feature output as ``compute-fbank-feats`` (from kaldi)
|
||||
when given the same options.
|
||||
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
:caption: Contents
|
||||
|
||||
installation
|
||||
usage
|
||||
intro
|
||||
installation/index
|
||||
usage/index
|
||||
|
@ -1,54 +0,0 @@
|
||||
Installation
|
||||
============
|
||||
|
||||
.. _from source:
|
||||
|
||||
Install kaldifeat from source
|
||||
-----------------------------
|
||||
|
||||
You have to install ``cmake`` and ``PyTorch`` first.
|
||||
|
||||
- ``cmake`` 3.11 is known to work. Other CMake versions may also work.
|
||||
- ``PyTorch`` 1.8.1 is known to work. Other PyTorch versions may also work.
|
||||
- Python >= 3.6
|
||||
|
||||
|
||||
The commands to install ``kaldifeat`` from source are:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
git clone https://github.com/csukuangfj/kaldifeat
|
||||
cd kaldifeat
|
||||
python3 setup.py install
|
||||
|
||||
To test that you have installed ``kaldifeat`` successfully, please run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
|
||||
It should print the version, e.g., ``1.0``.
|
||||
|
||||
Install kaldifeat from PyPI
|
||||
---------------------------
|
||||
|
||||
The pre-built ``kaldifeat`` hosted on PyPI uses PyTorch 1.8.1.
|
||||
If you install ``kaldifeat`` using pip, it will replace your locally
|
||||
installed PyTorch automatically with PyTorch 1.8.1.
|
||||
|
||||
If you don't want this happen, please `Install kaldifeat from source`_.
|
||||
|
||||
The command to install ``kaldifeat`` from PyPI is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install kaldifeat
|
||||
|
||||
|
||||
To test that you have installed ``kaldifeat`` successfully, please run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
|
||||
It should print the version, e.g., ``1.0``.
|
48
doc/source/installation/faq.rst
Normal file
@ -0,0 +1,48 @@
|
||||
FAQs
|
||||
====
|
||||
|
||||
How to install a CUDA version of kaldifeat from source
|
||||
------------------------------------------------------
|
||||
|
||||
You need to first install a CUDA version of `PyTorch`_ and then install `kaldifeat`_.
|
||||
|
||||
.. note::
|
||||
|
||||
You can use a CUDA version of `kaldifeat`_ on machines with no GPUs.
|
||||
|
||||
How to install a CPU version of kaldifeat from source
|
||||
-----------------------------------------------------
|
||||
|
||||
You need to first install a CPU version of `PyTorch`_ and then install `kaldifeat`_.
|
||||
|
||||
How to fix `Caffe2: Cannot find cuDNN library`
|
||||
----------------------------------------------
|
||||
|
||||
.. code-block::
|
||||
|
||||
Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN
|
||||
libraries. Please set the proper cuDNN prefixes and / or install cuDNN.
|
||||
|
||||
You will have such an error when you want to install a CUDA version of `kaldifeat`_
|
||||
by ``pip install kaldifeat`` or from source.
|
||||
|
||||
You need to first install cuDNN. Assume you have installed cuDNN to the
|
||||
path ``/path/to/cudnn``. You can fix the error by using ``one`` of the following
|
||||
commands.
|
||||
|
||||
(1) Fix for installation using ``pip install``
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
|
||||
pip install --verbose kaldifeat
|
||||
|
||||
(2) Fix for installation from source
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
mkdir /some/path
|
||||
git clone https://github.com/csukuangfj/kaldifeat.git
|
||||
cd kaldifeat
|
||||
export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
|
||||
python setup.py install
|
47
doc/source/installation/from_source.rst
Normal file
@ -0,0 +1,47 @@
|
||||
.. _from source:
|
||||
|
||||
Install kaldifeat from source
|
||||
=============================
|
||||
|
||||
You have to install ``cmake`` and `PyTorch`_ first.
|
||||
|
||||
- ``cmake`` 3.11 is known to work. Other CMake versions may also work.
|
||||
- `PyTorch`_ >= 1.5.0 is known to work. Other PyTorch versions may also work.
|
||||
- Python >= 3.6
|
||||
- A compiler that supports C++ 14
|
||||
|
||||
|
||||
The commands to install `kaldifeat`_ from source are:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
git clone https://github.com/csukuangfj/kaldifeat
|
||||
cd kaldifeat
|
||||
python3 setup.py install
|
||||
|
||||
To test that you have installed `kaldifeat`_ successfully, please run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
|
||||
It should print the version, e.g., ``1.0``.
|
||||
|
||||
.. _from PyPI:
|
||||
|
||||
Install kaldifeat from PyPI
|
||||
---------------------------
|
||||
|
||||
The command to install `kaldifeat`_ from PyPI is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install --verbose kaldifeat
|
||||
|
||||
To test that you have installed `kaldifeat`_ successfully, please run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
|
||||
It should print the version, e.g., ``1.0``.
|
139
doc/source/installation/from_wheels.rst
Normal file
@ -0,0 +1,139 @@
|
||||
From pre-compiled wheels (Recommended)
|
||||
=======================================
|
||||
|
||||
You can find pre-compiled wheels at
|
||||
|
||||
- CPU wheels: `<https://csukuangfj.github.io/kaldifeat/cpu.html>`_
|
||||
- CUDA wheels: `<https://csukuangfj.github.io/kaldifeat/cuda.html>`_
|
||||
|
||||
We give a few examples below to show you how to install `kaldifeat`_ from
|
||||
pre-compiled wheels.
|
||||
|
||||
.. hint::
|
||||
|
||||
The following lists only some examples. We suggest that you always select the
|
||||
latest version of ``kaldifeat``.
|
||||
|
||||
Linux (CPU)
|
||||
-----------
|
||||
|
||||
Suppose you want to install the following wheel:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
|
||||
you can use one of the following methods:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# method 1
|
||||
pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
|
||||
pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
|
||||
|
||||
# For users from China
|
||||
# 中国国内用户,如果访问不了 huggingface, 请使用
|
||||
# pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
|
||||
|
||||
# method 2
|
||||
pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
|
||||
wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
|
||||
# For users from China
|
||||
# 中国国内用户,如果访问不了 huggingface, 请使用
|
||||
# wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
|
||||
pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
|
||||
Windows (CPU)
|
||||
--------------
|
||||
|
||||
Suppose you want to install the following wheel:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
https://huggingface.co/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
|
||||
|
||||
you can use one of the following methods:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# method 1
|
||||
pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
|
||||
pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
|
||||
|
||||
# For users from China
|
||||
# 中国国内用户,如果访问不了 huggingface, 请使用
|
||||
# pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
|
||||
|
||||
# method 2
|
||||
pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
|
||||
wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
|
||||
|
||||
# For users from China
|
||||
# 中国国内用户,如果访问不了 huggingface, 请使用
|
||||
# wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
|
||||
|
||||
pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
|
||||
|
||||
macOS (CPU)
|
||||
-----------
|
||||
|
||||
Suppose you want to install the following wheel:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
https://huggingface.co/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
|
||||
|
||||
you can use one of the following methods:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# method 1
|
||||
pip install torch==2.4.0
|
||||
pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
|
||||
|
||||
# For users from China
|
||||
# 中国国内用户,如果访问不了 huggingface, 请使用
|
||||
# pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
|
||||
|
||||
# method 2
|
||||
pip install torch==2.4.0 -f https://download.pytorch.org/whl/torch/
|
||||
wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
|
||||
|
||||
# For users from China
|
||||
# 中国国内用户,如果访问不了 huggingface, 请使用
|
||||
# wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
|
||||
|
||||
pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
|
||||
|
||||
Linux (CUDA)
|
||||
------------
|
||||
|
||||
Suppose you want to install the following wheel:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
|
||||
you can use one of the following methods:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# method 1
|
||||
pip install torch==2.4.0+cu124 -f https://download.pytorch.org/whl/torch/
|
||||
pip install kaldifeat==1.25.4.dev20240725+cuda12.4.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cuda.html
|
||||
|
||||
# For users from China
|
||||
# 中国国内用户,如果访问不了 huggingface, 请使用
|
||||
# pip install kaldifeat==1.25.4.dev20240725+cuda12.4.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cuda-cn.html
|
||||
|
||||
# method 2
|
||||
pip install torch==2.4.0+cu124 -f https://download.pytorch.org/whl/torch/
|
||||
wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
|
||||
# For users from China
|
||||
# 中国国内用户,如果访问不了 huggingface, 请使用
|
||||
# wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
||||
|
||||
pip install ./kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
11
doc/source/installation/index.rst
Normal file
@ -0,0 +1,11 @@
|
||||
Installation
|
||||
============
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
|
||||
./from_wheels.rst
|
||||
./from_source.rst
|
||||
./faq.rst
|
||||
|
||||
|
103
doc/source/intro.rst
Normal file
@ -0,0 +1,103 @@
|
||||
Introduction
|
||||
============
|
||||
|
||||
`kaldifeat`_ implements
|
||||
speech feature extraction algorithms **compatible** with `Kaldi`_ using `PyTorch`_,
|
||||
supporting CUDA as well as autograd.
|
||||
|
||||
`kaldifeat`_ has the following features:
|
||||
|
||||
- Fully compatible with `Kaldi`_
|
||||
|
||||
.. note::
|
||||
|
||||
The underlying C++ code is copied & modified from `Kaldi`_ directly.
|
||||
It is rewritten with `PyTorch` C++ APIs.
|
||||
|
||||
- Provide not only ``C++ APIs`` but also ``Python APIs``
|
||||
|
||||
.. note::
|
||||
|
||||
You can access `kaldifeat`_ from ``Python``.
|
||||
|
||||
- Support autograd
|
||||
- Support ``CUDA`` and ``CPU``
|
||||
|
||||
.. note::
|
||||
|
||||
You can use CUDA for feature extraction.
|
||||
|
||||
- Support ``online`` (i.e., ``streaming``) and ``offline`` (i.e., ``non-streaming``)
|
||||
feature extraction
|
||||
- Support chunk-based processing
|
||||
|
||||
.. note::
|
||||
|
||||
This is especially usefull if you want to process audios of several
|
||||
hours long, which may cause OOM if you send them for computation at once.
|
||||
With chunk-based processing, you can process audios of arbirtray length.
|
||||
|
||||
- Support batch processing
|
||||
|
||||
.. note::
|
||||
|
||||
With `kaldifeat`_ you can extract features for a batch of audios
|
||||
|
||||
|
||||
.. see https://sublime-and-sphinx-guide.readthedocs.io/en/latest/tables.html
|
||||
|
||||
Currently implemented speech features and their counterparts in `Kaldi`_ are
|
||||
listed in the following table.
|
||||
|
||||
.. list-table:: Supported speech features
|
||||
:widths: 50 50
|
||||
:header-rows: 1
|
||||
|
||||
* - Supported speech features
|
||||
- Counterpart in `Kaldi`_
|
||||
* - `kaldifeat.Fbank`_
|
||||
- `compute-fbank-feats`_
|
||||
* - `kaldifeat.Mfcc`_
|
||||
- `compute-mfcc-feats`_
|
||||
* - `kaldifeat.Plp`_
|
||||
- `compute-plp-feats`_
|
||||
* - `kaldifeat.Spectrogram`_
|
||||
- `compute-spectrogram-feats`_
|
||||
* - `kaldifeat.OnlineFbank`_
|
||||
- `kaldi::OnlineFbank`_
|
||||
* - `kaldifeat.OnlineMfcc`_
|
||||
- `kaldi::OnlineMfcc`_
|
||||
* - `kaldifeat.OnlinePlp`_
|
||||
- `kaldi::OnlinePlp`_
|
||||
|
||||
Each feature computer needs an option. The following table lists the options
|
||||
for each computer and the corresponding options in `Kaldi`_.
|
||||
|
||||
.. hint::
|
||||
|
||||
Note that we reuse the parameter names from `Kaldi`_.
|
||||
|
||||
Also, both online feature computers and offline feature computers share the
|
||||
same option.
|
||||
|
||||
.. list-table:: Feature computer options
|
||||
:widths: 50 50
|
||||
:header-rows: 1
|
||||
|
||||
* - Options in `kaldifeat`_
|
||||
- Corresponding options in `Kaldi`_
|
||||
* - `kaldifeat.FbankOptions`_
|
||||
- `kaldi::FbankOptions`_
|
||||
* - `kaldifeat.MfccOptions`_
|
||||
- `kaldi::MfccOptions`_
|
||||
* - `kaldifeat.PlpOptions`_
|
||||
- `kaldi::PlpOptions`_
|
||||
* - `kaldifeat.SpectrogramOptions`_
|
||||
- `kaldi::SpectrogramOptions`_
|
||||
* - `kaldifeat.FrameExtractionOptions`_
|
||||
- `kaldi::FrameExtractionOptions`_
|
||||
* - `kaldifeat.MelBanksOptions`_
|
||||
- `kaldi::MelBanksOptions`_
|
||||
|
||||
Read more to learn how to install `kaldifeat`_ and how to use each feature
|
||||
computer.
|
@ -1,212 +0,0 @@
|
||||
Usage
|
||||
=====
|
||||
|
||||
Let us first see the help message of kaldi's ``compute-fbank-feats``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ compute-fbank-feats
|
||||
|
||||
Create Mel-filter bank (FBANK) feature files.
|
||||
Usage: compute-fbank-feats [options...] <wav-rspecifier> <feats-wspecifier>
|
||||
|
||||
Options:
|
||||
--allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
|
||||
--allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
|
||||
--blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42)
|
||||
--channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
|
||||
--debug-mel : Print out debugging information for mel bin computation (bool, default = false)
|
||||
--dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
|
||||
--energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0)
|
||||
--frame-length : Frame length in milliseconds (float, default = 25)
|
||||
--frame-shift : Frame shift in milliseconds (float, default = 10)
|
||||
--high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
|
||||
--htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
|
||||
--low-freq : Low cutoff frequency for mel bins (float, default = 20)
|
||||
--max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
|
||||
--min-duration : Minimum duration of segments to process (in seconds). (float, default = 0)
|
||||
--num-mel-bins : Number of triangular mel-frequency bins (int, default = 23)
|
||||
--output-format : Format of the output files [kaldi, htk] (string, default = "kaldi")
|
||||
--preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97)
|
||||
--raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true)
|
||||
--remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true)
|
||||
--round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
|
||||
--sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
|
||||
--snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
|
||||
--subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false)
|
||||
--use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false)
|
||||
--use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true)
|
||||
--use-power : If true, use power, else use magnitude. (bool, default = true)
|
||||
--utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
|
||||
--vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
|
||||
--vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
|
||||
--vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
|
||||
--vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
|
||||
--window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
|
||||
--write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
|
||||
|
||||
Standard options:
|
||||
--config : Configuration file to read (this option may be repeated) (string, default = "")
|
||||
--help : Print out usage message (bool, default = false)
|
||||
--print-args : Print the command line arguments (to stderr) (bool, default = true)
|
||||
--verbose : Verbose level (higher->more logging) (int, default = 0)
|
||||
|
||||
FbankOptions
|
||||
------------
|
||||
|
||||
``kaldifeat`` reuses the same options from kaldi's ``compute-fbank-feats``.
|
||||
|
||||
The following shows the default values of ``kaldifeat.FbankOptions``:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> import kaldifeat
|
||||
>>> fbank_opts = kaldifeat.FbankOptions()
|
||||
>>> print(fbank_opts)
|
||||
frame_opts:
|
||||
samp_freq: 16000
|
||||
frame_shift_ms: 10
|
||||
frame_length_ms: 25
|
||||
dither: 1
|
||||
preemph_coeff: 0.97
|
||||
remove_dc_offset: 1
|
||||
window_type: povey
|
||||
round_to_power_of_two: 1
|
||||
blackman_coeff: 0.42
|
||||
snip_edges: 1
|
||||
|
||||
|
||||
mel_opts:
|
||||
num_bins: 23
|
||||
low_freq: 20
|
||||
high_freq: 0
|
||||
vtln_low: 100
|
||||
vtln_high: -500
|
||||
debug_mel: 0
|
||||
htk_mode: 0
|
||||
|
||||
use_energy: 0
|
||||
energy_floor: 0
|
||||
raw_energy: 1
|
||||
htk_compat: 0
|
||||
use_log_fbank: 1
|
||||
use_power: 1
|
||||
device: cpu
|
||||
|
||||
It consists of three parts:
|
||||
|
||||
- ``frame_opts``
|
||||
|
||||
Options in this part are accessed by ``frame_opts.xxx``. That is, to access
|
||||
the sample rate, you use:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> fbank_opts = kaldifeat.FbankOptions()
|
||||
>>> print(fbank_opts.frame_opts.samp_freq)
|
||||
16000.0
|
||||
|
||||
- ``mel_opts``
|
||||
|
||||
Options in this part are accessed by ``mel_opts.xxx``. That is, to access
|
||||
the number of mel bins, you use:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> fbank_opts = kaldifeat.FbankOptions()
|
||||
>>> print(fbank_opts.mel_opts.num_bins)
|
||||
23
|
||||
|
||||
- fbank related
|
||||
|
||||
Options in this part are accessed directly. That is, to access the device
|
||||
field, you use:
|
||||
|
||||
.. code-block::
|
||||
|
||||
>>> print(fbank_opts.device)
|
||||
cpu
|
||||
>>> fbank_opts.device = 'cuda:0'
|
||||
>>> print(fbank_opts.device)
|
||||
cuda:0
|
||||
>>> import torch
|
||||
>>> fbank_opts.device = torch.device('cuda', 0)
|
||||
>>> print(fbank_opts.device)
|
||||
cuda:0
|
||||
|
||||
|
||||
|
||||
To change the sample rate to 8000, you can use:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> fbank_opts = kaldifeat.FbankOptions()
|
||||
>>> print(fbank_opts.frame_opts.samp_freq)
|
||||
16000.0
|
||||
>>> fbank_opts.frame_opts.samp_freq = 8000
|
||||
>>> print(fbank_opts.frame_opts.samp_freq)
|
||||
8000.0
|
||||
|
||||
To change ``snip_edges`` to ``False``, you can use:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> fbank_opts.frame_opts.snip_edges = False
|
||||
>>> print(fbank_opts.frame_opts.snip_edges)
|
||||
False
|
||||
|
||||
To change number of mel bins to 80, you can use:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
>>> print(fbank_opts.mel_opts.num_bins)
|
||||
23
|
||||
>>> fbank_opts.mel_opts.num_bins = 80
|
||||
>>> print(fbank_opts.mel_opts.num_bins)
|
||||
80
|
||||
|
||||
To change the device to ``cuda``, you can use:
|
||||
|
||||
|
||||
Fbank
|
||||
-----
|
||||
|
||||
The following shows how to use ``kaldifeat.Fbank`` to compute
|
||||
the fbank features of sound files.
|
||||
|
||||
First, let us generate two sound files using ``sox``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# generate a wav of two seconds, containing a sine-wave
|
||||
# swept from 300 Hz to 3300 Hz
|
||||
sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300
|
||||
|
||||
# another sound file with 0.5 seconds
|
||||
sox -n -r 16000 -b 16 test2.wav synth 0.5 sine 300-3300
|
||||
|
||||
.. hint::
|
||||
|
||||
You can find the above two files by visiting the following two links:
|
||||
|
||||
- `test.wav <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_data/test.wav>`_
|
||||
- `test2.wav <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_data/test2.wav>`_
|
||||
|
||||
The `following code <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_fbank.py>`_
|
||||
shows the usage of ``kaldifeat.Fbank``.
|
||||
|
||||
It shows:
|
||||
|
||||
- How to read a sound file. Note that audio samples are scaled to the range [-32768, 32768].
|
||||
The intention is to produce the same output as kaldi. You don't need to scale it if
|
||||
you don't care about the compatibility with kaldi
|
||||
|
||||
- ``kaldifeat.Fbank`` supports CUDA as well as CPU
|
||||
|
||||
- ``kaldifeat.Fbank`` supports processing sound file in a batch as well as accepting
|
||||
a single sound file
|
||||
|
||||
|
||||
.. literalinclude:: ./code/test_fbank.py
|
||||
:caption: Demo of ``kaldifeat.Fbank``
|
||||
:language: python
|
46
doc/source/usage/code/compute-fbank-feats-help.txt
Normal file
@ -0,0 +1,46 @@
|
||||
compute-fbank-feats
|
||||
|
||||
Create Mel-filter bank (FBANK) feature files.
|
||||
Usage: compute-fbank-feats [options...] <wav-rspecifier> <feats-wspecifier>
|
||||
|
||||
Options:
|
||||
--allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
|
||||
--allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
|
||||
--blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42)
|
||||
--channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
|
||||
--debug-mel : Print out debugging information for mel bin computation (bool, default = false)
|
||||
--dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
|
||||
--energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0)
|
||||
--frame-length : Frame length in milliseconds (float, default = 25)
|
||||
--frame-shift : Frame shift in milliseconds (float, default = 10)
|
||||
--high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
|
||||
--htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
|
||||
--low-freq : Low cutoff frequency for mel bins (float, default = 20)
|
||||
--max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
|
||||
--min-duration : Minimum duration of segments to process (in seconds). (float, default = 0)
|
||||
--num-mel-bins : Number of triangular mel-frequency bins (int, default = 23)
|
||||
--output-format : Format of the output files [kaldi, htk] (string, default = "kaldi")
|
||||
--preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97)
|
||||
--raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true)
|
||||
--remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true)
|
||||
--round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
|
||||
--sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
|
||||
--snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
|
||||
--subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false)
|
||||
--use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false)
|
||||
--use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true)
|
||||
--use-power : If true, use power, else use magnitude. (bool, default = true)
|
||||
--utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
|
||||
--vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
|
||||
--vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
|
||||
--vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
|
||||
--vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
|
||||
--window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
|
||||
--write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
|
||||
|
||||
Standard options:
|
||||
--config : Configuration file to read (this option may be repeated) (string, default = "")
|
||||
--help : Print out usage message (bool, default = false)
|
||||
--print-args : Print the command line arguments (to stderr) (bool, default = true)
|
||||
--verbose : Verbose level (higher->more logging) (int, default = 0)
|
||||
|
65
doc/source/usage/code/fbank_options-1.txt
Normal file
@ -0,0 +1,65 @@
|
||||
$ python3
|
||||
Python 3.8.0 (default, Oct 28 2019, 16:14:01)
|
||||
[GCC 8.3.0] on linux
|
||||
Type "help", "copyright", "credits" or "license" for more information.
|
||||
>>> import kaldifeat
|
||||
>>> opts = kaldifeat.FbankOptions()
|
||||
>>> print(opts)
|
||||
frame_opts:
|
||||
samp_freq: 16000
|
||||
frame_shift_ms: 10
|
||||
frame_length_ms: 25
|
||||
dither: 1
|
||||
preemph_coeff: 0.97
|
||||
remove_dc_offset: 1
|
||||
window_type: povey
|
||||
round_to_power_of_two: 1
|
||||
blackman_coeff: 0.42
|
||||
snip_edges: 1
|
||||
max_feature_vectors: -1
|
||||
|
||||
|
||||
mel_opts:
|
||||
num_bins: 23
|
||||
low_freq: 20
|
||||
high_freq: 0
|
||||
vtln_low: 100
|
||||
vtln_high: -500
|
||||
debug_mel: 0
|
||||
htk_mode: 0
|
||||
|
||||
use_energy: 0
|
||||
energy_floor: 0
|
||||
raw_energy: 1
|
||||
htk_compat: 0
|
||||
use_log_fbank: 1
|
||||
use_power: 1
|
||||
device: cpu
|
||||
|
||||
>>> print(opts.dither)
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
AttributeError: '_kaldifeat.FbankOptions' object has no attribute 'dither'
|
||||
>>>
|
||||
>>> print(opts.frame_opts.dither)
|
||||
1.0
|
||||
>>> opts.frame_opts.dither = 0 # disable dither
|
||||
>>> print(opts.frame_opts.dither)
|
||||
0.0
|
||||
>>> import torch
|
||||
>>> print(opts.device)
|
||||
cpu
|
||||
>>> opts.device = 'cuda:0'
|
||||
>>> print(opts.device)
|
||||
cuda:0
|
||||
>>> opts.device = torch.device('cuda', 1)
|
||||
>>> print(opts.device)
|
||||
cuda:1
|
||||
>>> opts.device = 'cpu'
|
||||
>>> print(opts.device)
|
||||
cpu
|
||||
>>> print(opts.mel_opts.num_bins)
|
||||
23
|
||||
>>> opts.mel_opts.num_bins = 80
|
||||
>>> print(opts.mel_opts.num_bins)
|
||||
80
|
1
doc/source/usage/code/test_fbank_options.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../kaldifeat/python/tests/test_fbank_options.py
|
3
doc/source/usage/fbank.rst
Normal file
@ -0,0 +1,3 @@
|
||||
kaldifeat.Fbank
|
||||
===============
|
||||
|
51
doc/source/usage/fbank_options.rst
Normal file
@ -0,0 +1,51 @@
|
||||
kaldifeat.FbankOptions
|
||||
======================
|
||||
|
||||
If you want to construct an instance of `kaldifeat.Fbank`_ or
|
||||
`kaldifeat.OnlineFbank`_, you have to provide an instance of
|
||||
`kaldifeat.FbankOptions`_.
|
||||
|
||||
The following code shows how to construct an instance of `kaldifeat.FbankOptions`_.
|
||||
|
||||
.. literalinclude:: ./code/fbank_options-1.txt
|
||||
:caption: Usage of `kaldifeat.FbankOptions`_
|
||||
:emphasize-lines: 6,8,22,37
|
||||
|
||||
Note that we reuse the same option name with `compute-fbank-feats`_ from `Kaldi`_:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ compute-fbank-feats --help
|
||||
|
||||
|
||||
.. literalinclude:: ./code/compute-fbank-feats-help.txt
|
||||
:caption: Output of ``compute-fbank-feats --help``
|
||||
|
||||
Please refer to the output of ``compute-fbank-feats --help`` for the meaning
|
||||
of each field of `kaldifeat.FbankOptions`_.
|
||||
|
||||
One thing worth noting is that `kaldifeat.FbankOptions`_ has a field ``device``,
|
||||
which is an instance of ``torch.device``. You can assign it either a string, e.g.,
|
||||
``"cpu"`` or ``"cuda:0"``, or an instance of ``torch.device``, e.g., ``torch.device("cpu")`` or
|
||||
``torch.device("cuda", 1)``.
|
||||
|
||||
.. hint::
|
||||
|
||||
You can use this field to control whether the feature computer
|
||||
constructed from it performs computation on CPU or CUDA.
|
||||
|
||||
.. caution::
|
||||
|
||||
If you use a CUDA device, make sure that you have installed a CUDA version
|
||||
of `PyTorch`_.
|
||||
|
||||
Example usage
|
||||
-------------
|
||||
|
||||
The following code from
|
||||
`<https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_fbank_options.py>`_
|
||||
demonstrate the usage of `kaldifeat.FbankOptions`_:
|
||||
|
||||
.. literalinclude:: ./code/test_fbank_options.py
|
||||
:caption: Example usage of `kaldifeat.FbankOptions`_
|
||||
:language: python
|
11
doc/source/usage/index.rst
Normal file
@ -0,0 +1,11 @@
|
||||
Usage
|
||||
=====
|
||||
|
||||
This section describes how to use feature computers in `kaldifeat`_.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
fbank_options
|
||||
fbank
|
||||
online_fbank
|
3
doc/source/usage/online_fbank.rst
Normal file
@ -0,0 +1,3 @@
|
||||
kaldifeat.OnlineFbank
|
||||
=====================
|
||||
|
106
get_version.py
Executable file
@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
def is_macos():
|
||||
return platform.system() == "Darwin"
|
||||
|
||||
|
||||
def is_windows():
|
||||
return platform.system() == "Windows"
|
||||
|
||||
|
||||
def with_cuda():
|
||||
if shutil.which("nvcc") is None:
|
||||
return False
|
||||
|
||||
if is_macos():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_pytorch_version():
|
||||
# if it is 1.7.1+cuda101, then strip +cuda101
|
||||
return torch.__version__.split("+")[0]
|
||||
|
||||
|
||||
def get_cuda_version():
|
||||
from torch.utils import collect_env
|
||||
|
||||
running_cuda_version = collect_env.get_running_cuda_version(collect_env.run)
|
||||
cuda_version = torch.version.cuda
|
||||
if running_cuda_version is not None and cuda_version is not None:
|
||||
assert cuda_version in running_cuda_version, (
|
||||
f"PyTorch is built with CUDA version: {cuda_version}.\n"
|
||||
f"The current running CUDA version is: {running_cuda_version}"
|
||||
)
|
||||
return cuda_version
|
||||
|
||||
|
||||
def is_for_pypi():
|
||||
ans = os.environ.get("KALDIFEAT_IS_FOR_PYPI", None)
|
||||
return ans is not None
|
||||
|
||||
|
||||
def is_stable():
|
||||
ans = os.environ.get("KALDIFEAT_IS_STABLE", None)
|
||||
return ans is not None
|
||||
|
||||
|
||||
def is_for_conda():
|
||||
ans = os.environ.get("KALDIFEAT_IS_FOR_CONDA", None)
|
||||
return ans is not None
|
||||
|
||||
|
||||
def get_package_version():
|
||||
# Set a default CUDA version here so that `pip install kaldifeat`
|
||||
# uses the default CUDA version.
|
||||
#
|
||||
default_cuda_version = "10.1" # CUDA 10.1
|
||||
|
||||
if with_cuda():
|
||||
cuda_version = get_cuda_version()
|
||||
if is_for_pypi() and default_cuda_version == cuda_version:
|
||||
cuda_version = ""
|
||||
pytorch_version = ""
|
||||
local_version = ""
|
||||
else:
|
||||
cuda_version = f"+cuda{cuda_version}"
|
||||
pytorch_version = get_pytorch_version()
|
||||
local_version = f"{cuda_version}.torch{pytorch_version}"
|
||||
else:
|
||||
pytorch_version = get_pytorch_version()
|
||||
local_version = f"+cpu.torch{pytorch_version}"
|
||||
|
||||
if is_for_conda():
|
||||
local_version = ""
|
||||
|
||||
if is_for_pypi() and is_macos():
|
||||
local_version = ""
|
||||
|
||||
with open("CMakeLists.txt") as f:
|
||||
content = f.read()
|
||||
|
||||
latest_version = re.search(r"set\(kaldifeat_VERSION (.*)\)", content).group(
|
||||
1
|
||||
)
|
||||
latest_version = latest_version.strip('"')
|
||||
|
||||
if not is_stable():
|
||||
dt = datetime.datetime.utcnow()
|
||||
package_version = f"{latest_version}.dev{dt.year}{dt.month:02d}{dt.day:02d}{local_version}"
|
||||
else:
|
||||
package_version = f"{latest_version}"
|
||||
return package_version
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(get_package_version())
|
@ -1,2 +1,4 @@
|
||||
add_subdirectory(csrc)
|
||||
add_subdirectory(python)
|
||||
if(kaldifeat_BUILD_PYMODULE)
|
||||
add_subdirectory(python)
|
||||
endif()
|
||||
|
@ -9,14 +9,26 @@ set(kaldifeat_srcs
|
||||
feature-window.cc
|
||||
matrix-functions.cc
|
||||
mel-computations.cc
|
||||
online-feature.cc
|
||||
whisper-fbank.cc
|
||||
)
|
||||
|
||||
add_library(kaldifeat_core SHARED ${kaldifeat_srcs})
|
||||
add_library(kaldifeat_core ${kaldifeat_srcs})
|
||||
target_link_libraries(kaldifeat_core PUBLIC ${TORCH_LIBRARIES})
|
||||
|
||||
target_compile_definitions(kaldifeat_core PUBLIC KALDIFEAT_TORCH_VERSION_MAJOR=${KALDIFEAT_TORCH_VERSION_MAJOR})
|
||||
target_compile_definitions(kaldifeat_core PUBLIC KALDIFEAT_TORCH_VERSION_MINOR=${KALDIFEAT_TORCH_VERSION_MINOR})
|
||||
|
||||
if(APPLE)
|
||||
execute_process(
|
||||
COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
|
||||
)
|
||||
message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
|
||||
target_link_libraries(kaldifeat_core PUBLIC "-L ${PYTHON_SITE_PACKAGE_DIR}/../..")
|
||||
endif()
|
||||
|
||||
add_executable(test_kaldifeat test_kaldifeat.cc)
|
||||
target_link_libraries(test_kaldifeat PRIVATE kaldifeat_core)
|
||||
|
||||
@ -30,19 +42,52 @@ function(kaldifeat_add_test source)
|
||||
gtest_main
|
||||
)
|
||||
|
||||
# NOTE: We set the working directory here so that
|
||||
# it works also on windows. The reason is that
|
||||
# the required DLLs are inside ${TORCH_DIR}/lib
|
||||
# and they can be found by the exe if the current
|
||||
# working directory is ${TORCH_DIR}\lib
|
||||
add_test(NAME "Test.${name}"
|
||||
COMMAND
|
||||
$<TARGET_FILE:${name}>
|
||||
WORKING_DIRECTORY ${TORCH_DIR}/lib
|
||||
)
|
||||
endfunction()
|
||||
|
||||
if(BUILD_TESTS)
|
||||
if(kaldifeat_BUILD_TESTS)
|
||||
# please sort the source files alphabetically
|
||||
set(test_srcs
|
||||
feature-window-test.cc
|
||||
online-feature-test.cc
|
||||
)
|
||||
|
||||
foreach(source IN LISTS test_srcs)
|
||||
kaldifeat_add_test(${source})
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
file(MAKE_DIRECTORY
|
||||
DESTINATION
|
||||
${PROJECT_BINARY_DIR}/include/kaldifeat/csrc
|
||||
)
|
||||
|
||||
file(GLOB_RECURSE all_headers *.h)
|
||||
message(STATUS "All headers: ${all_headers}")
|
||||
|
||||
file(COPY
|
||||
${all_headers}
|
||||
DESTINATION
|
||||
${PROJECT_BINARY_DIR}/include/kaldifeat/csrc
|
||||
)
|
||||
if(BUILD_SHARED_LIBS AND WIN32)
|
||||
install(TARGETS kaldifeat_core
|
||||
DESTINATION ../
|
||||
)
|
||||
endif()
|
||||
install(TARGETS kaldifeat_core
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
)
|
||||
|
||||
install(FILES ${all_headers}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/kaldifeat/csrc
|
||||
)
|
||||
|
1
kaldifeat/csrc/CPPLINT.cfg
Normal file
@ -0,0 +1 @@
|
||||
exclude_files=whisper-mel-bank.h,whisper-v3-mel-bank.h
|
@ -55,10 +55,17 @@ torch::Tensor OfflineFeatureTpl<F>::ComputeFeatures(const torch::Tensor &wave,
|
||||
int32_t padding = frame_opts.PaddedWindowSize() - strided_input.size(1);
|
||||
|
||||
if (padding > 0) {
|
||||
#ifdef __ANDROID__
|
||||
auto padding_value = torch::zeros(
|
||||
{strided_input.size(0), padding},
|
||||
torch::dtype(torch::kFloat).device(strided_input.device()));
|
||||
strided_input = torch::cat({strided_input, padding_value}, 1);
|
||||
#else
|
||||
strided_input = torch::nn::functional::pad(
|
||||
strided_input, torch::nn::functional::PadFuncOptions({0, padding})
|
||||
.mode(torch::kConstant)
|
||||
.value(0));
|
||||
#endif
|
||||
}
|
||||
|
||||
return computer_.Compute(log_energy_pre_window, vtln_warp, strided_input);
|
||||
|
@ -62,6 +62,10 @@ class OfflineFeatureTpl {
|
||||
int32_t Dim() const { return computer_.Dim(); }
|
||||
const Options &GetOptions() const { return computer_.GetOptions(); }
|
||||
|
||||
const FrameExtractionOptions &GetFrameOptions() const {
|
||||
return GetOptions().frame_opts;
|
||||
}
|
||||
|
||||
// Copy constructor.
|
||||
OfflineFeatureTpl(const OfflineFeatureTpl<F> &) = delete;
|
||||
OfflineFeatureTpl<F> &operator=(const OfflineFeatureTpl<F> &) = delete;
|
||||
|
@ -8,8 +8,6 @@
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "torch/torch.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
|
||||
@ -67,7 +65,7 @@ torch::Tensor FbankComputer::Compute(torch::Tensor signal_raw_log_energy,
|
||||
// note spectrum is in magnitude, not power, because of `abs()`
|
||||
#if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
|
||||
// signal_frame shape: [x, 512]
|
||||
// spectrum shape [x, 257
|
||||
// spectrum shape [x, 257]
|
||||
torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
|
||||
#else
|
||||
// signal_frame shape [x, 512]
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include "kaldifeat/csrc/feature-common.h"
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "kaldifeat/csrc/mel-computations.h"
|
||||
#include "torch/torch.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
@ -45,20 +44,18 @@ struct FbankOptions {
|
||||
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
os << "frame_opts: \n";
|
||||
os << frame_opts << "\n";
|
||||
os << "\n";
|
||||
os << "FbankOptions(";
|
||||
|
||||
os << "mel_opts: \n";
|
||||
os << mel_opts << "\n";
|
||||
os << "frame_opts=" << frame_opts.ToString() << ", ";
|
||||
os << "mel_opts=" << mel_opts.ToString() << ", ";
|
||||
|
||||
os << "use_energy: " << use_energy << "\n";
|
||||
os << "energy_floor: " << energy_floor << "\n";
|
||||
os << "raw_energy: " << raw_energy << "\n";
|
||||
os << "htk_compat: " << htk_compat << "\n";
|
||||
os << "use_log_fbank: " << use_log_fbank << "\n";
|
||||
os << "use_power: " << use_power << "\n";
|
||||
os << "device: " << device << "\n";
|
||||
os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
|
||||
os << "energy_floor=" << energy_floor << ", ";
|
||||
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
|
||||
os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
|
||||
os << "use_log_fbank=" << (use_log_fbank ? "True" : "False") << ", ";
|
||||
os << "use_power=" << (use_power ? "True" : "False") << ", ";
|
||||
os << "device=\"" << device << "\")";
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
|
@ -7,7 +7,7 @@
|
||||
#ifndef KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_
|
||||
#define KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_
|
||||
|
||||
#include "torch/torch.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include "kaldifeat/csrc/feature-common.h"
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "kaldifeat/csrc/mel-computations.h"
|
||||
#include "torch/torch.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
@ -53,20 +53,18 @@ struct MfccOptions {
|
||||
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
os << "frame_opts: \n";
|
||||
os << frame_opts << "\n";
|
||||
os << "\n";
|
||||
os << "MfccOptions(";
|
||||
os << "frame_opts=" << frame_opts.ToString() << ", ";
|
||||
os << "mel_opts=" << mel_opts.ToString() << ", ";
|
||||
|
||||
os << "mel_opts: \n";
|
||||
os << mel_opts << "\n";
|
||||
os << "num_ceps=" << num_ceps << ", ";
|
||||
os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
|
||||
os << "energy_floor=" << energy_floor << ", ";
|
||||
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
|
||||
os << "cepstral_lifter=" << cepstral_lifter << ", ";
|
||||
os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
|
||||
os << "device=\"" << device << "\")";
|
||||
|
||||
os << "num_ceps: " << num_ceps << "\n";
|
||||
os << "use_energy: " << use_energy << "\n";
|
||||
os << "energy_floor: " << energy_floor << "\n";
|
||||
os << "raw_energy: " << raw_energy << "\n";
|
||||
os << "cepstral_lifter: " << cepstral_lifter << "\n";
|
||||
os << "htk_compat: " << htk_compat << "\n";
|
||||
os << "device: " << device << "\n";
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include "kaldifeat/csrc/feature-plp.h"
|
||||
|
||||
#include "kaldifeat/csrc/feature-functions.h"
|
||||
#include "torch/torch.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include "kaldifeat/csrc/feature-common.h"
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "kaldifeat/csrc/mel-computations.h"
|
||||
#include "torch/torch.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
@ -61,23 +61,21 @@ struct PlpOptions {
|
||||
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
os << "frame_opts: \n";
|
||||
os << frame_opts << "\n";
|
||||
os << "\n";
|
||||
os << "PlpOptions(";
|
||||
|
||||
os << "mel_opts: \n";
|
||||
os << mel_opts << "\n";
|
||||
os << "frame_opts=" << frame_opts.ToString() << ", ";
|
||||
os << "mel_opts=" << mel_opts.ToString() << ", ";
|
||||
|
||||
os << "lpc_order: " << lpc_order << "\n";
|
||||
os << "num_ceps: " << num_ceps << "\n";
|
||||
os << "use_energy: " << use_energy << "\n";
|
||||
os << "energy_floor: " << energy_floor << "\n";
|
||||
os << "raw_energy: " << raw_energy << "\n";
|
||||
os << "compress_factor: " << compress_factor << "\n";
|
||||
os << "cepstral_lifter: " << cepstral_lifter << "\n";
|
||||
os << "cepstral_scale: " << cepstral_scale << "\n";
|
||||
os << "htk_compat: " << htk_compat << "\n";
|
||||
os << "device: " << device << "\n";
|
||||
os << "lpc_order=" << lpc_order << ", ";
|
||||
os << "num_ceps=" << num_ceps << ", ";
|
||||
os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
|
||||
os << "energy_floor=" << energy_floor << ", ";
|
||||
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
|
||||
os << "compress_factor=" << compress_factor << ", ";
|
||||
os << "cepstral_lifter=" << cepstral_lifter << ", ";
|
||||
os << "cepstral_scale=" << cepstral_scale << ", ";
|
||||
os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
|
||||
os << "device=\"" << device << "\")";
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
#include "kaldifeat/csrc/feature-common.h"
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "torch/torch.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
@ -36,13 +36,12 @@ struct SpectrogramOptions {
|
||||
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
os << "frame_opts: \n";
|
||||
os << frame_opts << "\n";
|
||||
|
||||
os << "energy_floor: " << energy_floor << "\n";
|
||||
os << "raw_energy: " << raw_energy << "\n";
|
||||
// os << "return_raw_fft: " << return_raw_fft << "\n";
|
||||
os << "device: " << device << "\n";
|
||||
os << "SpectrogramOptions(";
|
||||
os << "frame_opts=" << frame_opts.ToString() << ", ";
|
||||
os << "energy_floor=" << energy_floor << ", ";
|
||||
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
|
||||
os << "return_raw_fft=" << (return_raw_fft ? "True" : "False") << ", ";
|
||||
os << "device=\"" << device << "\")";
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
|
@ -9,8 +9,6 @@
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "torch/torch.h"
|
||||
|
||||
#ifndef M_2PI
|
||||
#define M_2PI 6.283185307179586476925286766559005
|
||||
#endif
|
||||
@ -31,6 +29,13 @@ FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts,
|
||||
float *window_data = window.data_ptr<float>();
|
||||
|
||||
double a = M_2PI / (frame_length - 1);
|
||||
|
||||
if (opts.window_type == "hann") {
|
||||
// see https://pytorch.org/docs/stable/generated/torch.hann_window.html
|
||||
// We assume periodic is true
|
||||
a = M_2PI / frame_length;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < frame_length; i++) {
|
||||
double i_fl = static_cast<double>(i);
|
||||
if (opts.window_type == "hanning") {
|
||||
@ -41,6 +46,8 @@ FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts,
|
||||
window_data[i] = sin(0.5 * a * i_fl);
|
||||
} else if (opts.window_type == "hamming") {
|
||||
window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
|
||||
} else if (opts.window_type == "hann") {
|
||||
window_data[i] = 0.50 - 0.50 * cos(a * i_fl);
|
||||
} else if (opts.window_type ==
|
||||
"povey") { // like hamming but goes to zero at edges.
|
||||
window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
|
||||
@ -163,19 +170,20 @@ torch::Tensor Dither(const torch::Tensor &wave, float dither_value) {
|
||||
#if 1
|
||||
return wave + rand_gauss * dither_value;
|
||||
#else
|
||||
// use in-place version of wave and change its to pointer type
|
||||
// use in-place version of wave and change it to pointer type
|
||||
wave_->add_(rand_gauss, dither_value);
|
||||
#endif
|
||||
}
|
||||
|
||||
torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave) {
|
||||
using namespace torch::indexing; // It imports: Slice, None // NOLINT
|
||||
if (preemph_coeff == 0.0f) return wave;
|
||||
|
||||
KALDIFEAT_ASSERT(preemph_coeff >= 0.0f && preemph_coeff <= 1.0f);
|
||||
|
||||
torch::Tensor ans = torch::empty_like(wave);
|
||||
|
||||
using torch::indexing::None;
|
||||
using torch::indexing::Slice;
|
||||
// right = wave[:, 1:]
|
||||
torch::Tensor right = wave.index({"...", Slice(1, None, None)});
|
||||
|
||||
@ -190,4 +198,59 @@ torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave) {
|
||||
return ans;
|
||||
}
|
||||
|
||||
torch::Tensor ExtractWindow(int64_t sample_offset, const torch::Tensor &wave,
|
||||
int32_t f, const FrameExtractionOptions &opts) {
|
||||
KALDIFEAT_ASSERT(sample_offset >= 0 && wave.numel() != 0);
|
||||
|
||||
int32_t frame_length = opts.WindowSize();
|
||||
int64_t num_samples = sample_offset + wave.numel();
|
||||
int64_t start_sample = FirstSampleOfFrame(f, opts);
|
||||
int64_t end_sample = start_sample + frame_length;
|
||||
|
||||
if (opts.snip_edges) {
|
||||
KALDIFEAT_ASSERT(start_sample >= sample_offset &&
|
||||
end_sample <= num_samples);
|
||||
} else {
|
||||
KALDIFEAT_ASSERT(sample_offset == 0 || start_sample >= sample_offset);
|
||||
}
|
||||
|
||||
// wave_start and wave_end are start and end indexes into 'wave', for the
|
||||
// piece of wave that we're trying to extract.
|
||||
int32_t wave_start = static_cast<int32_t>(start_sample - sample_offset);
|
||||
int32_t wave_end = wave_start + frame_length;
|
||||
|
||||
if (wave_start >= 0 && wave_end <= wave.numel()) {
|
||||
// the normal case -- no edge effects to consider.
|
||||
// return wave[wave_start:wave_end]
|
||||
return wave.index({torch::indexing::Slice(wave_start, wave_end)});
|
||||
} else {
|
||||
torch::Tensor window = torch::empty({frame_length}, torch::kFloat);
|
||||
auto p_window = window.accessor<float, 1>();
|
||||
auto p_wave = wave.accessor<float, 1>();
|
||||
|
||||
// Deal with any end effects by reflection, if needed. This code will only
|
||||
// be reached for about two frames per utterance, so we don't concern
|
||||
// ourselves excessively with efficiency.
|
||||
int32_t wave_dim = wave.numel();
|
||||
for (int32_t s = 0; s != frame_length; ++s) {
|
||||
int32_t s_in_wave = s + wave_start;
|
||||
while (s_in_wave < 0 || s_in_wave >= wave_dim) {
|
||||
// reflect around the beginning or end of the wave.
|
||||
// e.g. -1 -> 0, -2 -> 1.
|
||||
// dim -> dim - 1, dim + 1 -> dim - 2.
|
||||
// the code supports repeated reflections, although this
|
||||
// would only be needed in pathological cases.
|
||||
if (s_in_wave < 0) {
|
||||
s_in_wave = -s_in_wave - 1;
|
||||
} else {
|
||||
s_in_wave = 2 * wave_dim - 1 - s_in_wave;
|
||||
}
|
||||
}
|
||||
|
||||
p_window[s] = p_wave[s_in_wave];
|
||||
}
|
||||
return window;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kaldifeat
|
||||
|
@ -7,7 +7,8 @@
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/log.h"
|
||||
#include "torch/torch.h"
|
||||
#include "torch/all.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
#ifndef KALDIFEAT_CSRC_FEATURE_WINDOW_H_
|
||||
#define KALDIFEAT_CSRC_FEATURE_WINDOW_H_
|
||||
@ -43,7 +44,11 @@ struct FrameExtractionOptions {
|
||||
bool snip_edges = true;
|
||||
// bool allow_downsample = false;
|
||||
// bool allow_upsample = false;
|
||||
// int32_t max_feature_vectors = -1;
|
||||
|
||||
// Used for streaming feature extraction. It indicates the number
|
||||
// of feature frames to keep in the recycling vector. -1 means to
|
||||
// keep all feature frames.
|
||||
int32_t max_feature_vectors = -1;
|
||||
|
||||
int32_t WindowShift() const {
|
||||
return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
|
||||
@ -57,21 +62,20 @@ struct FrameExtractionOptions {
|
||||
}
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
#define KALDIFEAT_PRINT(x) os << #x << ": " << x << "\n"
|
||||
KALDIFEAT_PRINT(samp_freq);
|
||||
KALDIFEAT_PRINT(frame_shift_ms);
|
||||
KALDIFEAT_PRINT(frame_length_ms);
|
||||
KALDIFEAT_PRINT(dither);
|
||||
KALDIFEAT_PRINT(preemph_coeff);
|
||||
KALDIFEAT_PRINT(remove_dc_offset);
|
||||
KALDIFEAT_PRINT(window_type);
|
||||
KALDIFEAT_PRINT(round_to_power_of_two);
|
||||
KALDIFEAT_PRINT(blackman_coeff);
|
||||
KALDIFEAT_PRINT(snip_edges);
|
||||
// KALDIFEAT_PRINT(allow_downsample);
|
||||
// KALDIFEAT_PRINT(allow_upsample);
|
||||
// KALDIFEAT_PRINT(max_feature_vectors);
|
||||
#undef KALDIFEAT_PRINT
|
||||
os << "FrameExtractionOptions(";
|
||||
os << "samp_freq=" << samp_freq << ", ";
|
||||
os << "frame_shift_ms=" << frame_shift_ms << ", ";
|
||||
os << "frame_length_ms=" << frame_length_ms << ", ";
|
||||
os << "dither=" << dither << ", ";
|
||||
os << "preemph_coeff=" << preemph_coeff << ", ";
|
||||
os << "remove_dc_offset=" << (remove_dc_offset ? "True" : "False") << ", ";
|
||||
os << "window_type=" << '"' << window_type << '"' << ", ";
|
||||
os << "round_to_power_of_two=" << (round_to_power_of_two ? "True" : "False")
|
||||
<< ", ";
|
||||
os << "blackman_coeff=" << blackman_coeff << ", ";
|
||||
os << "snip_edges=" << (snip_edges ? "True" : "False") << ", ";
|
||||
os << "max_feature_vectors=" << max_feature_vectors << ")";
|
||||
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
@ -99,11 +103,11 @@ class FeatureWindowFunction {
|
||||
|
||||
@param [in] flush True if we are asserting that this number of samples
|
||||
is 'all there is', false if we expecting more data to possibly come in. This
|
||||
only makes a difference to the answer if opts.snips_edges
|
||||
== false. For offline feature extraction you always want flush ==
|
||||
true. In an online-decoding context, once you know (or decide)
|
||||
that no more data is coming in, you'd call it with flush == true at the end
|
||||
to flush out any remaining data.
|
||||
only makes a difference to the answer
|
||||
if opts.snips_edges== false. For offline feature extraction you always want
|
||||
flush == true. In an online-decoding context, once you know (or decide) that
|
||||
no more data is coming in, you'd call it with flush == true at the end to
|
||||
flush out any remaining data.
|
||||
*/
|
||||
int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
|
||||
bool flush = true);
|
||||
@ -132,6 +136,29 @@ torch::Tensor Dither(const torch::Tensor &wave, float dither_value);
|
||||
|
||||
torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave);
|
||||
|
||||
/*
|
||||
ExtractWindow() extracts "frame_length" samples from the given waveform.
|
||||
Note: This function only extracts "frame_length" samples
|
||||
from the input waveform, without any further processing.
|
||||
|
||||
@param [in] sample_offset If 'wave' is not the entire waveform, but
|
||||
part of it to the left has been discarded, then the
|
||||
number of samples prior to 'wave' that we have
|
||||
already discarded. Set this to zero if you are
|
||||
processing the entire waveform in one piece, or
|
||||
if you get 'no matching function' compilation
|
||||
errors when updating the code.
|
||||
@param [in] wave The waveform
|
||||
@param [in] f The frame index to be extracted, with
|
||||
0 <= f < NumFrames(sample_offset + wave.numel(), opts, true)
|
||||
@param [in] opts The options class to be used
|
||||
@return Return a tensor containing "frame_length" samples extracted from
|
||||
`wave`, without any further processing. Its shape is
|
||||
(1, frame_length).
|
||||
*/
|
||||
torch::Tensor ExtractWindow(int64_t sample_offset, const torch::Tensor &wave,
|
||||
int32_t f, const FrameExtractionOptions &opts);
|
||||
|
||||
} // namespace kaldifeat
|
||||
|
||||
#endif // KALDIFEAT_CSRC_FEATURE_WINDOW_H_
|
||||
|
39
kaldifeat/csrc/generate-whisper-melbank-v3.py
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=128)
|
||||
assert m.shape == (128, 201)
|
||||
s = "// Auto-generated. Do NOT edit!\n\n"
|
||||
s += "// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)\n\n"
|
||||
s += "\n"
|
||||
s += "#ifndef KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
|
||||
s += "#define KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
|
||||
s += "namespace kaldifeat {\n\n"
|
||||
s += f"constexpr int32_t kWhisperV3MelRows = {m.shape[0]};\n"
|
||||
s += f"constexpr int32_t kWhisperV3MelCols = {m.shape[1]};\n"
|
||||
s += "\n"
|
||||
s += "constexpr float kWhisperV3MelArray[] = {\n"
|
||||
sep = ""
|
||||
for i, f in enumerate(m.reshape(-1).tolist()):
|
||||
s += f"{sep}{f:.8f}"
|
||||
sep = ", "
|
||||
if i and i % 7 == 0:
|
||||
s += ",\n"
|
||||
sep = ""
|
||||
|
||||
s += "};\n\n"
|
||||
s += "} // namespace kaldifeat\n\n"
|
||||
s += "#endif // KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
|
||||
|
||||
with open("whisper-v3-mel-bank.h", "w") as f:
|
||||
f.write(s)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
39
kaldifeat/csrc/generate-whisper-melbank.py
Executable file
@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=80)
|
||||
assert m.shape == (80, 201)
|
||||
s = "// Auto-generated. Do NOT edit!\n\n"
|
||||
s += "// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)\n\n"
|
||||
s += "\n"
|
||||
s += "#ifndef KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
|
||||
s += "#define KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
|
||||
s += "namespace kaldifeat {\n\n"
|
||||
s += f"constexpr int32_t kWhisperMelRows = {m.shape[0]};\n"
|
||||
s += f"constexpr int32_t kWhisperMelCols = {m.shape[1]};\n"
|
||||
s += "\n"
|
||||
s += "constexpr float kWhisperMelArray[] = {\n"
|
||||
sep = ""
|
||||
for i, f in enumerate(m.reshape(-1).tolist()):
|
||||
s += f"{sep}{f:.8f}"
|
||||
sep = ", "
|
||||
if i and i % 7 == 0:
|
||||
s += ",\n"
|
||||
sep = ""
|
||||
|
||||
s += "};\n\n"
|
||||
s += "} // namespace kaldifeat\n\n"
|
||||
s += "#endif // KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
|
||||
|
||||
with open("whisper-mel-bank.h", "w") as f:
|
||||
f.write(s)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -5,6 +5,7 @@
|
||||
#ifndef KALDIFEAT_CSRC_LOG_H_
|
||||
#define KALDIFEAT_CSRC_LOG_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
@ -7,7 +7,7 @@
|
||||
#ifndef KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_
|
||||
#define KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_
|
||||
|
||||
#include "torch/torch.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
|
@ -138,7 +138,7 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
|
||||
<< " and vtln-high " << vtln_high << ", versus "
|
||||
<< "low-freq " << low_freq << " and high-freq " << high_freq;
|
||||
|
||||
// we will transpose bins_mat_ at the end of this funciton
|
||||
// we will transpose bins_mat_ at the end of this function
|
||||
bins_mat_ = torch::zeros({num_bins, num_fft_bins}, torch::kFloat);
|
||||
|
||||
int32_t stride = bins_mat_.strides()[0];
|
||||
@ -179,12 +179,14 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
|
||||
last_index = i;
|
||||
}
|
||||
}
|
||||
KALDIFEAT_ASSERT(first_index != -1 && last_index >= first_index &&
|
||||
"You may have set num_mel_bins too large.");
|
||||
|
||||
// Note: It is possible that first_index == last_index == -1 at this line.
|
||||
|
||||
// Replicate a bug in HTK, for testing purposes.
|
||||
if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f)
|
||||
if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f &&
|
||||
first_index != -1) {
|
||||
this_bin[first_index] = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
if (debug_) KALDIFEAT_LOG << bins_mat_;
|
||||
@ -196,6 +198,15 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
|
||||
}
|
||||
}
|
||||
|
||||
MelBanks::MelBanks(const float *weights, int32_t num_rows, int32_t num_cols,
|
||||
torch::Device device)
|
||||
: debug_(false), htk_mode_(false) {
|
||||
bins_mat_ = torch::from_blob(const_cast<float *>(weights),
|
||||
{num_rows, num_cols}, torch::kFloat)
|
||||
.t()
|
||||
.to(device);
|
||||
}
|
||||
|
||||
torch::Tensor MelBanks::Compute(const torch::Tensor &spectrum) const {
|
||||
return torch::mm(spectrum, bins_mat_);
|
||||
}
|
||||
|
@ -36,13 +36,14 @@ struct MelBanksOptions {
|
||||
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
os << "num_bins: " << num_bins << "\n";
|
||||
os << "low_freq: " << low_freq << "\n";
|
||||
os << "high_freq: " << high_freq << "\n";
|
||||
os << "vtln_low: " << vtln_low << "\n";
|
||||
os << "vtln_high: " << vtln_high << "\n";
|
||||
os << "debug_mel: " << debug_mel << "\n";
|
||||
os << "htk_mode: " << htk_mode << "\n";
|
||||
os << "MelBanksOptions(";
|
||||
os << "num_bins=" << num_bins << ", ";
|
||||
os << "low_freq=" << low_freq << ", ";
|
||||
os << "high_freq=" << high_freq << ", ";
|
||||
os << "vtln_low=" << vtln_low << ", ";
|
||||
os << "vtln_high=" << vtln_high << ", ";
|
||||
os << "debug_mel=" << (debug_mel ? "True" : "False") << ", ";
|
||||
os << "htk_mode=" << (htk_mode ? "True" : "False") << ")";
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
@ -75,6 +76,17 @@ class MelBanks {
|
||||
const FrameExtractionOptions &frame_opts, float vtln_warp_factor,
|
||||
torch::Device device);
|
||||
|
||||
// Initialize with a 2-d weights matrix
|
||||
//
|
||||
// Note: This constructor is for Whisper. It does not initialize
|
||||
// center_freqs_.
|
||||
//
|
||||
// @param weights Pointer to the start address of the matrix
|
||||
// @param num_rows It equals to number of mel bins
|
||||
// @param num_cols It equals to (number of fft bins)/2+1
|
||||
MelBanks(const float *weights, int32_t num_rows, int32_t num_cols,
|
||||
torch::Device device);
|
||||
|
||||
// CAUTION: we save a transposed version of bins_mat_, so return size(1) here
|
||||
int32_t NumBins() const { return static_cast<int32_t>(bins_mat_.size(1)); }
|
||||
|
||||
@ -88,7 +100,8 @@ class MelBanks {
|
||||
|
||||
private:
|
||||
// A 2-D matrix. Its shape is NOT [num_bins, num_fft_bins]
|
||||
// Its shape is [num_fft_bins, num_bins].
|
||||
// Its shape is [num_fft_bins, num_bins] for non-whisper.
|
||||
// For whisper, its shape is [num_fft_bins/2+1, num_bins]
|
||||
torch::Tensor bins_mat_;
|
||||
|
||||
// center frequencies of bins, numbered from 0 ... num_bins-1.
|
||||
|
89
kaldifeat/csrc/online-feature-itf.h
Normal file
@ -0,0 +1,89 @@
|
||||
// kaldifeat/csrc/online-feature-itf.h
|
||||
//
|
||||
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
// This file is copied/modified from kaldi/src/itf/online-feature-itf.h
|
||||
|
||||
#ifndef KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_
|
||||
#define KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_
|
||||
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "torch/script.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
class OnlineFeatureInterface {
|
||||
public:
|
||||
virtual ~OnlineFeatureInterface() = default;
|
||||
|
||||
virtual int32_t Dim() const = 0; /// returns the feature dimension.
|
||||
//
|
||||
// Returns frame shift in seconds. Helps to estimate duration from frame
|
||||
// counts.
|
||||
virtual float FrameShiftInSeconds() const = 0;
|
||||
|
||||
/// Returns the total number of frames, since the start of the utterance, that
|
||||
/// are now available. In an online-decoding context, this will likely
|
||||
/// increase with time as more data becomes available.
|
||||
virtual int32_t NumFramesReady() const = 0;
|
||||
|
||||
/// Returns true if this is the last frame. Frame indices are zero-based, so
|
||||
/// the first frame is zero. IsLastFrame(-1) will return false, unless the
|
||||
/// file is empty (which is a case that I'm not sure all the code will handle,
|
||||
/// so be careful). This function may return false for some frame if we
|
||||
/// haven't yet decided to terminate decoding, but later true if we decide to
|
||||
/// terminate decoding. This function exists mainly to correctly handle end
|
||||
/// effects in feature extraction, and is not a mechanism to determine how
|
||||
/// many frames are in the decodable object (as it used to be, and for
|
||||
/// backward compatibility, still is, in the Decodable interface).
|
||||
virtual bool IsLastFrame(int32_t frame) const = 0;
|
||||
|
||||
/// Gets the feature vector for this frame. Before calling this for a given
|
||||
/// frame, it is assumed that you called NumFramesReady() and it returned a
|
||||
/// number greater than "frame". Otherwise this call will likely crash with
|
||||
/// an assert failure. This function is not declared const, in case there is
|
||||
/// some kind of caching going on, but most of the time it shouldn't modify
|
||||
/// the class.
|
||||
///
|
||||
/// The returned tensor has shape (1, Dim()).
|
||||
virtual torch::Tensor GetFrame(int32_t frame) = 0;
|
||||
|
||||
/// This is like GetFrame() but for a collection of frames. There is a
|
||||
/// default implementation that just gets the frames one by one, but it
|
||||
/// may be overridden for efficiency by child classes (since sometimes
|
||||
/// it's more efficient to do things in a batch).
|
||||
///
|
||||
/// The returned tensor has shape (frames.size(), Dim()).
|
||||
virtual std::vector<torch::Tensor> GetFrames(
|
||||
const std::vector<int32_t> &frames) {
|
||||
std::vector<torch::Tensor> features;
|
||||
features.reserve(frames.size());
|
||||
|
||||
for (auto i : frames) {
|
||||
torch::Tensor f = GetFrame(i);
|
||||
features.push_back(std::move(f));
|
||||
}
|
||||
return features;
|
||||
#if 0
|
||||
return torch::cat(features, /*dim*/ 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// This would be called from the application, when you get more wave data.
|
||||
/// Note: the sampling_rate is typically only provided so the code can assert
|
||||
/// that it matches the sampling rate expected in the options.
|
||||
virtual void AcceptWaveform(float sampling_rate,
|
||||
const torch::Tensor &waveform) = 0;
|
||||
|
||||
/// InputFinished() tells the class you won't be providing any
|
||||
/// more waveform. This will help flush out the last few frames
|
||||
/// of delta or LDA features (it will typically affect the return value
|
||||
/// of IsLastFrame.
|
||||
virtual void InputFinished() = 0;
|
||||
};
|
||||
|
||||
} // namespace kaldifeat
|
||||
|
||||
#endif // KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_
|
49
kaldifeat/csrc/online-feature-test.cc
Normal file
@ -0,0 +1,49 @@
|
||||
// kaldifeat/csrc/online-feature-test.h
|
||||
//
|
||||
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
#include "kaldifeat/csrc/online-feature.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
TEST(RecyclingVector, TestUnlimited) {
|
||||
RecyclingVector v(-1);
|
||||
constexpr int32_t N = 100;
|
||||
for (int32_t i = 0; i != N; ++i) {
|
||||
torch::Tensor t = torch::tensor({i, i + 1, i + 2});
|
||||
v.PushBack(t);
|
||||
}
|
||||
ASSERT_EQ(v.Size(), N);
|
||||
|
||||
for (int32_t i = 0; i != N; ++i) {
|
||||
torch::Tensor t = v.At(i);
|
||||
torch::Tensor expected = torch::tensor({i, i + 1, i + 2});
|
||||
EXPECT_TRUE(t.equal(expected));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RecyclingVector, Testlimited) {
|
||||
constexpr int32_t K = 3;
|
||||
constexpr int32_t N = 10;
|
||||
RecyclingVector v(K);
|
||||
for (int32_t i = 0; i != N; ++i) {
|
||||
torch::Tensor t = torch::tensor({i, i + 1, i + 2});
|
||||
v.PushBack(t);
|
||||
}
|
||||
|
||||
ASSERT_EQ(v.Size(), N);
|
||||
|
||||
for (int32_t i = 0; i < N - K; ++i) {
|
||||
ASSERT_DEATH(v.At(i), "");
|
||||
}
|
||||
|
||||
for (int32_t i = N - K; i != N; ++i) {
|
||||
torch::Tensor t = v.At(i);
|
||||
torch::Tensor expected = torch::tensor({i, i + 1, i + 2});
|
||||
EXPECT_TRUE(t.equal(expected));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kaldifeat
|
133
kaldifeat/csrc/online-feature.cc
Normal file
@ -0,0 +1,133 @@
|
||||
// kaldifeat/csrc/online-feature.cc
|
||||
//
|
||||
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/online-feature.cc
|
||||
|
||||
#include "kaldifeat/csrc/online-feature.h"
|
||||
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "kaldifeat/csrc/log.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
RecyclingVector::RecyclingVector(int32_t items_to_hold)
|
||||
: items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
|
||||
first_available_index_(0) {}
|
||||
|
||||
torch::Tensor RecyclingVector::At(int32_t index) const {
|
||||
if (index < first_available_index_) {
|
||||
KALDIFEAT_ERR << "Attempted to retrieve feature vector that was "
|
||||
"already removed by the RecyclingVector (index = "
|
||||
<< index << "; "
|
||||
<< "first_available_index = " << first_available_index_
|
||||
<< "; "
|
||||
<< "size = " << Size() << ")";
|
||||
}
|
||||
// 'at' does size checking.
|
||||
return items_.at(index - first_available_index_);
|
||||
}
|
||||
|
||||
void RecyclingVector::PushBack(torch::Tensor item) {
|
||||
// Note: -1 is a larger number when treated as unsigned
|
||||
if (items_.size() == static_cast<size_t>(items_to_hold_)) {
|
||||
items_.pop_front();
|
||||
++first_available_index_;
|
||||
}
|
||||
items_.push_back(item);
|
||||
}
|
||||
|
||||
int32_t RecyclingVector::Size() const {
|
||||
return first_available_index_ + static_cast<int32_t>(items_.size());
|
||||
}
|
||||
|
||||
template <class C>
|
||||
OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
|
||||
const typename C::Options &opts)
|
||||
: computer_(opts),
|
||||
window_function_(opts.frame_opts, opts.device),
|
||||
features_(opts.frame_opts.max_feature_vectors),
|
||||
input_finished_(false),
|
||||
waveform_offset_(0) {}
|
||||
|
||||
template <class C>
|
||||
void OnlineGenericBaseFeature<C>::AcceptWaveform(
|
||||
float sampling_rate, const torch::Tensor &original_waveform) {
|
||||
if (original_waveform.numel() == 0) return; // Nothing to do.
|
||||
|
||||
KALDIFEAT_ASSERT(original_waveform.dim() == 1);
|
||||
KALDIFEAT_ASSERT(sampling_rate == computer_.GetFrameOptions().samp_freq);
|
||||
|
||||
if (input_finished_)
|
||||
KALDIFEAT_ERR << "AcceptWaveform called after InputFinished() was called.";
|
||||
|
||||
if (waveform_remainder_.numel() == 0) {
|
||||
waveform_remainder_ = original_waveform;
|
||||
} else {
|
||||
waveform_remainder_ =
|
||||
torch::cat({waveform_remainder_, original_waveform}, /*dim*/ 0);
|
||||
}
|
||||
|
||||
ComputeFeatures();
|
||||
}
|
||||
|
||||
template <class C>
|
||||
void OnlineGenericBaseFeature<C>::InputFinished() {
|
||||
input_finished_ = true;
|
||||
ComputeFeatures();
|
||||
}
|
||||
|
||||
template <class C>
|
||||
void OnlineGenericBaseFeature<C>::ComputeFeatures() {
|
||||
const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
|
||||
|
||||
int64_t num_samples_total = waveform_offset_ + waveform_remainder_.numel();
|
||||
int32_t num_frames_old = features_.Size();
|
||||
int32_t num_frames_new =
|
||||
NumFrames(num_samples_total, frame_opts, input_finished_);
|
||||
|
||||
KALDIFEAT_ASSERT(num_frames_new >= num_frames_old);
|
||||
|
||||
// note: this online feature-extraction code does not support VTLN.
|
||||
float vtln_warp = 1.0;
|
||||
|
||||
for (int32_t frame = num_frames_old; frame < num_frames_new; ++frame) {
|
||||
torch::Tensor window =
|
||||
ExtractWindow(waveform_offset_, waveform_remainder_, frame, frame_opts);
|
||||
|
||||
// TODO(fangjun): We can compute all feature frames at once
|
||||
torch::Tensor this_feature =
|
||||
computer_.ComputeFeatures(window.unsqueeze(0), vtln_warp);
|
||||
features_.PushBack(this_feature);
|
||||
}
|
||||
|
||||
// OK, we will now discard any portion of the signal that will not be
|
||||
// necessary to compute frames in the future.
|
||||
int64_t first_sample_of_next_frame =
|
||||
FirstSampleOfFrame(num_frames_new, frame_opts);
|
||||
int32_t samples_to_discard = first_sample_of_next_frame - waveform_offset_;
|
||||
if (samples_to_discard > 0) {
|
||||
// discard the leftmost part of the waveform that we no longer need.
|
||||
int32_t new_num_samples = waveform_remainder_.numel() - samples_to_discard;
|
||||
if (new_num_samples <= 0) {
|
||||
// odd, but we'll try to handle it.
|
||||
waveform_offset_ += waveform_remainder_.numel();
|
||||
waveform_remainder_.resize_({0});
|
||||
} else {
|
||||
using torch::indexing::None;
|
||||
using torch::indexing::Slice;
|
||||
|
||||
waveform_remainder_ =
|
||||
waveform_remainder_.index({Slice(samples_to_discard, None)});
|
||||
|
||||
waveform_offset_ += samples_to_discard;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// instantiate the templates defined here for MFCC, PLP and filterbank classes.
|
||||
template class OnlineGenericBaseFeature<Mfcc>;
|
||||
template class OnlineGenericBaseFeature<Plp>;
|
||||
template class OnlineGenericBaseFeature<Fbank>;
|
||||
|
||||
} // namespace kaldifeat
|
127
kaldifeat/csrc/online-feature.h
Normal file
@ -0,0 +1,127 @@
|
||||
// kaldifeat/csrc/online-feature.h
|
||||
//
|
||||
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
// This file is copied/modified from kaldi/src/feat/online-feature.h
|
||||
|
||||
#ifndef KALDIFEAT_CSRC_ONLINE_FEATURE_H_
|
||||
#define KALDIFEAT_CSRC_ONLINE_FEATURE_H_
|
||||
|
||||
#include <deque>
|
||||
|
||||
#include "kaldifeat/csrc/feature-fbank.h"
|
||||
#include "kaldifeat/csrc/feature-mfcc.h"
|
||||
#include "kaldifeat/csrc/feature-plp.h"
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "kaldifeat/csrc/online-feature-itf.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
/// This class serves as a storage for feature vectors with an option to limit
|
||||
/// the memory usage by removing old elements. The deleted frames indices are
|
||||
/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
|
||||
/// provides the indices as if no deletion was being performed.
|
||||
/// This is useful when processing very long recordings which would otherwise
|
||||
/// cause the memory to eventually blow up when the features are not being
|
||||
/// removed.
|
||||
class RecyclingVector {
|
||||
public:
|
||||
/// By default it does not remove any elements.
|
||||
explicit RecyclingVector(int32_t items_to_hold = -1);
|
||||
|
||||
~RecyclingVector() = default;
|
||||
RecyclingVector(const RecyclingVector &) = delete;
|
||||
RecyclingVector &operator=(const RecyclingVector &) = delete;
|
||||
|
||||
torch::Tensor At(int32_t index) const;
|
||||
|
||||
void PushBack(torch::Tensor item);
|
||||
|
||||
/// This method returns the size as if no "recycling" had happened,
|
||||
/// i.e. equivalent to the number of times the PushBack method has been
|
||||
/// called.
|
||||
int32_t Size() const;
|
||||
|
||||
private:
|
||||
std::deque<torch::Tensor> items_;
|
||||
int32_t items_to_hold_;
|
||||
int32_t first_available_index_;
|
||||
};
|
||||
|
||||
/// This is a templated class for online feature extraction;
|
||||
/// it's templated on a class like MfccComputer or PlpComputer
|
||||
/// that does the basic feature extraction.
|
||||
template <class C>
|
||||
class OnlineGenericBaseFeature : public OnlineFeatureInterface {
|
||||
public:
|
||||
// Constructor from options class
|
||||
explicit OnlineGenericBaseFeature(const typename C::Options &opts);
|
||||
|
||||
int32_t Dim() const override { return computer_.Dim(); }
|
||||
|
||||
float FrameShiftInSeconds() const override {
|
||||
return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
|
||||
}
|
||||
|
||||
int32_t NumFramesReady() const override { return features_.Size(); }
|
||||
|
||||
// Note: IsLastFrame() will only ever return true if you have called
|
||||
// InputFinished() (and this frame is the last frame).
|
||||
bool IsLastFrame(int32_t frame) const override {
|
||||
return input_finished_ && frame == NumFramesReady() - 1;
|
||||
}
|
||||
|
||||
torch::Tensor GetFrame(int32_t frame) override { return features_.At(frame); }
|
||||
|
||||
// This would be called from the application, when you get
|
||||
// more wave data. Note: the sampling_rate is only provided so
|
||||
// the code can assert that it matches the sampling rate
|
||||
// expected in the options.
|
||||
void AcceptWaveform(float sampling_rate,
|
||||
const torch::Tensor &waveform) override;
|
||||
|
||||
// InputFinished() tells the class you won't be providing any
|
||||
// more waveform. This will help flush out the last frame or two
|
||||
// of features, in the case where snip-edges == false; it also
|
||||
// affects the return value of IsLastFrame().
|
||||
void InputFinished() override;
|
||||
|
||||
private:
|
||||
// This function computes any additional feature frames that it is possible to
|
||||
// compute from 'waveform_remainder_', which at this point may contain more
|
||||
// than just a remainder-sized quantity (because AcceptWaveform() appends to
|
||||
// waveform_remainder_ before calling this function). It adds these feature
|
||||
// frames to features_, and shifts off any now-unneeded samples of input from
|
||||
// waveform_remainder_ while incrementing waveform_offset_ by the same amount.
|
||||
void ComputeFeatures();
|
||||
|
||||
C computer_; // class that does the MFCC or PLP or filterbank computation
|
||||
|
||||
FeatureWindowFunction window_function_;
|
||||
|
||||
// features_ is the Mfcc or Plp or Fbank features that we have already
|
||||
// computed.
|
||||
|
||||
RecyclingVector features_;
|
||||
|
||||
// True if the user has called "InputFinished()"
|
||||
bool input_finished_;
|
||||
|
||||
// waveform_offset_ is the number of samples of waveform that we have
|
||||
// already discarded, i.e. that were prior to 'waveform_remainder_'.
|
||||
int64_t waveform_offset_;
|
||||
|
||||
// waveform_remainder_ is a short piece of waveform that we may need to keep
|
||||
// after extracting all the whole frames we can (whatever length of feature
|
||||
// will be required for the next phase of computation).
|
||||
// It is a 1-D tensor
|
||||
torch::Tensor waveform_remainder_;
|
||||
};
|
||||
|
||||
using OnlineMfcc = OnlineGenericBaseFeature<Mfcc>;
|
||||
using OnlinePlp = OnlineGenericBaseFeature<Plp>;
|
||||
using OnlineFbank = OnlineGenericBaseFeature<Fbank>;
|
||||
|
||||
} // namespace kaldifeat
|
||||
|
||||
#endif // KALDIFEAT_CSRC_ONLINE_FEATURE_H_
|
@ -20,7 +20,7 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "torch/torch.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
|
@ -2,7 +2,8 @@
|
||||
//
|
||||
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
#include "torch/torch.h"
|
||||
#include "torch/all.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
static void TestPreemph() {
|
||||
torch::Tensor a = torch::arange(0, 12).reshape({3, 4}).to(torch::kFloat);
|
||||
|
88
kaldifeat/csrc/whisper-fbank.cc
Normal file
@ -0,0 +1,88 @@
|
||||
/**
|
||||
* Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kaldifeat/csrc/whisper-fbank.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldifeat/csrc/mel-computations.h"
|
||||
#include "kaldifeat/csrc/whisper-mel-bank.h"
|
||||
#include "kaldifeat/csrc/whisper-v3-mel-bank.h"
|
||||
|
||||
#ifndef M_2PI
|
||||
#define M_2PI 6.283185307179586476925286766559005
|
||||
#endif
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
WhisperFbankComputer::WhisperFbankComputer(const WhisperFbankOptions &opts)
|
||||
: opts_(opts) {
|
||||
if (opts.num_mels == 80) {
|
||||
mel_banks_ = std::make_unique<MelBanks>(kWhisperMelArray, kWhisperMelRows,
|
||||
kWhisperMelCols, opts.device);
|
||||
} else if (opts.num_mels == 128) {
|
||||
mel_banks_ = std::make_unique<MelBanks>(
|
||||
kWhisperV3MelArray, kWhisperV3MelRows, kWhisperV3MelCols, opts.device);
|
||||
} else {
|
||||
KALDIFEAT_ERR << "Unsupported num_mels: " << opts.num_mels
|
||||
<< ". Support only 80 and 128";
|
||||
}
|
||||
|
||||
opts_.frame_opts.samp_freq = 16000;
|
||||
opts_.frame_opts.frame_shift_ms = 10;
|
||||
opts_.frame_opts.frame_length_ms = 25;
|
||||
opts_.frame_opts.dither = 0;
|
||||
opts_.frame_opts.preemph_coeff = 0;
|
||||
opts_.frame_opts.remove_dc_offset = false;
|
||||
opts_.frame_opts.window_type = "hann";
|
||||
opts_.frame_opts.round_to_power_of_two = false;
|
||||
opts_.frame_opts.snip_edges = false;
|
||||
}
|
||||
|
||||
torch::Tensor WhisperFbankComputer::Compute(
|
||||
torch::Tensor /*signal_raw_log_energy*/, float /*vtln_warp*/,
|
||||
const torch::Tensor &signal_frame) {
|
||||
KALDIFEAT_ASSERT(signal_frame.dim() == 2);
|
||||
KALDIFEAT_ASSERT(signal_frame.size(1) == opts_.frame_opts.PaddedWindowSize());
|
||||
|
||||
// note spectrum is in magnitude, not power, because of `abs()`
|
||||
#if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
|
||||
// signal_frame shape: [x, 512]
|
||||
// power shape [x, 257]
|
||||
torch::Tensor power = torch::fft::rfft(signal_frame).abs().pow(2);
|
||||
#else
|
||||
// signal_frame shape [x, 512]
|
||||
// real_imag shape [x, 257, 2],
|
||||
// where [..., 0] is the real part
|
||||
// [..., 1] is the imaginary part
|
||||
torch::Tensor real_imag = torch::rfft(signal_frame, 1);
|
||||
torch::Tensor real = real_imag.index({"...", 0});
|
||||
torch::Tensor imag = real_imag.index({"...", 1});
|
||||
torch::Tensor power = (real.square() + imag.square());
|
||||
#endif
|
||||
|
||||
torch::Tensor mel_energies = mel_banks_->Compute(power);
|
||||
torch::Tensor log_spec = torch::clamp_min(mel_energies, 1e-10).log10();
|
||||
log_spec = torch::maximum(log_spec, log_spec.max() - 8.0);
|
||||
torch::Tensor mel = (log_spec + 4.0) / 4.0;
|
||||
|
||||
return mel;
|
||||
}
|
||||
|
||||
} // namespace kaldifeat
|
78
kaldifeat/csrc/whisper-fbank.h
Normal file
@ -0,0 +1,78 @@
|
||||
/**
|
||||
* Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
*
|
||||
* See LICENSE for clarification regarding multiple authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef KALDIFEAT_CSRC_WHISPER_FBANK_H_
|
||||
#define KALDIFEAT_CSRC_WHISPER_FBANK_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "kaldifeat/csrc/feature-common.h"
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "kaldifeat/csrc/mel-computations.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
struct WhisperFbankOptions {
|
||||
FrameExtractionOptions frame_opts;
|
||||
// for large v3, please use 128
|
||||
int32_t num_mels = 80;
|
||||
|
||||
torch::Device device{"cpu"};
|
||||
std::string ToString() const {
|
||||
std::ostringstream os;
|
||||
os << "WhisperFbankOptions(";
|
||||
os << "frame_opts=" << frame_opts.ToString() << ", ";
|
||||
os << "num_mels=" << num_mels << ", ";
|
||||
os << "device=\"" << device << "\")";
|
||||
return os.str();
|
||||
}
|
||||
};
|
||||
|
||||
class WhisperFbankComputer {
|
||||
public:
|
||||
// note: Only frame_opts.device is used. All other fields from frame_opts
|
||||
// are ignored
|
||||
explicit WhisperFbankComputer(const WhisperFbankOptions &opts = {});
|
||||
|
||||
int32_t Dim() const { return opts_.num_mels; }
|
||||
|
||||
const FrameExtractionOptions &GetFrameOptions() const {
|
||||
return opts_.frame_opts;
|
||||
}
|
||||
|
||||
const WhisperFbankOptions &GetOptions() const { return opts_; }
|
||||
|
||||
torch::Tensor Compute(torch::Tensor /*signal_raw_log_energy*/,
|
||||
float /*vtln_warp*/, const torch::Tensor &signal_frame);
|
||||
|
||||
// if true, compute log_energy_pre_window but after dithering and dc removal
|
||||
bool NeedRawLogEnergy() const { return false; }
|
||||
using Options = WhisperFbankOptions;
|
||||
|
||||
private:
|
||||
WhisperFbankOptions opts_;
|
||||
std::unique_ptr<MelBanks> mel_banks_;
|
||||
};
|
||||
|
||||
using WhisperFbank = OfflineFeatureTpl<WhisperFbankComputer>;
|
||||
|
||||
} // namespace kaldifeat
|
||||
|
||||
#endif // KALDIFEAT_CSRC_WHISPER_FBANK_H_
|
2315
kaldifeat/csrc/whisper-mel-bank.h
Normal file
3693
kaldifeat/csrc/whisper-v3-mel-bank.h
Normal file
@ -1,2 +1,5 @@
|
||||
add_subdirectory(csrc)
|
||||
add_subdirectory(tests)
|
||||
|
||||
if(kaldifeat_BUILD_TESTS)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
@ -7,9 +7,34 @@ pybind11_add_module(_kaldifeat
|
||||
feature-window.cc
|
||||
kaldifeat.cc
|
||||
mel-computations.cc
|
||||
online-feature.cc
|
||||
utils.cc
|
||||
whisper-fbank.cc
|
||||
)
|
||||
|
||||
if(APPLE)
|
||||
execute_process(
|
||||
COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
|
||||
)
|
||||
message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
|
||||
target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${kaldifeat_rpath_origin}/kaldifeat/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
target_link_libraries(_kaldifeat PRIVATE kaldifeat_core)
|
||||
if(UNIX AND NOT APPLE)
|
||||
target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/libtorch_python.so)
|
||||
target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARY})
|
||||
# target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARY})
|
||||
elseif(WIN32)
|
||||
target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/torch_python.lib)
|
||||
# target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARIES})
|
||||
endif()
|
||||
|
||||
install(TARGETS _kaldifeat
|
||||
DESTINATION ../
|
||||
)
|
||||
|
@ -4,9 +4,11 @@
|
||||
|
||||
#include "kaldifeat/python/csrc/feature-fbank.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/feature-fbank.h"
|
||||
#include "kaldifeat/python/csrc/utils.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
@ -14,6 +16,35 @@ static void PybindFbankOptions(py::module &m) {
|
||||
using PyClass = FbankOptions;
|
||||
py::class_<PyClass>(m, "FbankOptions")
|
||||
.def(py::init<>())
|
||||
.def(py::init([](const MelBanksOptions &mel_opts,
|
||||
const FrameExtractionOptions &frame_opts =
|
||||
FrameExtractionOptions(),
|
||||
bool use_energy = false, float energy_floor = 0.0f,
|
||||
bool raw_energy = true, bool htk_compat = false,
|
||||
bool use_log_fbank = true, bool use_power = true,
|
||||
py::object device =
|
||||
py::str("cpu")) -> std::unique_ptr<FbankOptions> {
|
||||
auto opts = std::make_unique<FbankOptions>();
|
||||
opts->frame_opts = frame_opts;
|
||||
opts->mel_opts = mel_opts;
|
||||
opts->use_energy = use_energy;
|
||||
opts->energy_floor = energy_floor;
|
||||
opts->raw_energy = raw_energy;
|
||||
opts->htk_compat = htk_compat;
|
||||
opts->use_log_fbank = use_log_fbank;
|
||||
opts->use_power = use_power;
|
||||
|
||||
std::string s = static_cast<py::str>(device);
|
||||
opts->device = torch::Device(s);
|
||||
|
||||
return opts;
|
||||
}),
|
||||
py::arg("mel_opts"),
|
||||
py::arg("frame_opts") = FrameExtractionOptions(),
|
||||
py::arg("use_energy") = false, py::arg("energy_floor") = 0.0f,
|
||||
py::arg("raw_energy") = true, py::arg("htk_compat") = false,
|
||||
py::arg("use_log_fbank") = true, py::arg("use_power") = true,
|
||||
py::arg("device") = py::str("cpu"))
|
||||
.def_readwrite("frame_opts", &PyClass::frame_opts)
|
||||
.def_readwrite("mel_opts", &PyClass::mel_opts)
|
||||
.def_readwrite("use_energy", &PyClass::use_energy)
|
||||
@ -33,7 +64,15 @@ static void PybindFbankOptions(py::module &m) {
|
||||
self.device = torch::Device(s);
|
||||
})
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); });
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static(
|
||||
"from_dict",
|
||||
[](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }));
|
||||
}
|
||||
|
||||
static void PybindFbank(py::module &m) {
|
||||
@ -43,7 +82,14 @@ static void PybindFbank(py::module &m) {
|
||||
.def("dim", &PyClass::Dim)
|
||||
.def_property_readonly("options", &PyClass::GetOptions)
|
||||
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
|
||||
py::arg("vtln_warp"));
|
||||
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict {
|
||||
return AsDict(self.GetOptions());
|
||||
},
|
||||
[](py::dict dict) -> std::unique_ptr<PyClass> {
|
||||
return std::make_unique<PyClass>(FbankOptionsFromDict(dict));
|
||||
}));
|
||||
}
|
||||
|
||||
void PybindFeatureFbank(py::module &m) {
|
||||
|
@ -4,9 +4,11 @@
|
||||
|
||||
#include "kaldifeat/python/csrc/feature-mfcc.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/feature-mfcc.h"
|
||||
#include "kaldifeat/python/csrc/utils.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
@ -14,6 +16,35 @@ void PybindMfccOptions(py::module &m) {
|
||||
using PyClass = MfccOptions;
|
||||
py::class_<PyClass>(m, "MfccOptions")
|
||||
.def(py::init<>())
|
||||
.def(py::init([](const MelBanksOptions &mel_opts,
|
||||
const FrameExtractionOptions &frame_opts =
|
||||
FrameExtractionOptions(),
|
||||
int32_t num_ceps = 13, bool use_energy = true,
|
||||
float energy_floor = 0.0, bool raw_energy = true,
|
||||
float cepstral_lifter = 22.0, bool htk_compat = false,
|
||||
py::object device =
|
||||
py::str("cpu")) -> std::unique_ptr<MfccOptions> {
|
||||
auto opts = std::make_unique<MfccOptions>();
|
||||
opts->frame_opts = frame_opts;
|
||||
opts->mel_opts = mel_opts;
|
||||
opts->num_ceps = num_ceps;
|
||||
opts->use_energy = use_energy;
|
||||
opts->energy_floor = energy_floor;
|
||||
opts->raw_energy = raw_energy;
|
||||
opts->cepstral_lifter = cepstral_lifter;
|
||||
opts->htk_compat = htk_compat;
|
||||
|
||||
std::string s = static_cast<py::str>(device);
|
||||
opts->device = torch::Device(s);
|
||||
|
||||
return opts;
|
||||
}),
|
||||
py::arg("mel_opts"),
|
||||
py::arg("frame_opts") = FrameExtractionOptions(),
|
||||
py::arg("num_ceps") = 13, py::arg("use_energy") = true,
|
||||
py::arg("energy_floor") = 0.0f, py::arg("raw_energy") = true,
|
||||
py::arg("cepstral_lifter") = 22.0, py::arg("htk_compat") = false,
|
||||
py::arg("device") = py::str("cpu"))
|
||||
.def_readwrite("frame_opts", &PyClass::frame_opts)
|
||||
.def_readwrite("mel_opts", &PyClass::mel_opts)
|
||||
.def_readwrite("num_ceps", &PyClass::num_ceps)
|
||||
@ -33,7 +64,15 @@ void PybindMfccOptions(py::module &m) {
|
||||
self.device = torch::Device(s);
|
||||
})
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); });
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static(
|
||||
"from_dict",
|
||||
[](py::dict dict) -> PyClass { return MfccOptionsFromDict(dict); })
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass { return MfccOptionsFromDict(dict); }));
|
||||
}
|
||||
|
||||
static void PybindMfcc(py::module &m) {
|
||||
@ -43,7 +82,14 @@ static void PybindMfcc(py::module &m) {
|
||||
.def("dim", &PyClass::Dim)
|
||||
.def_property_readonly("options", &PyClass::GetOptions)
|
||||
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
|
||||
py::arg("vtln_warp"));
|
||||
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict {
|
||||
return AsDict(self.GetOptions());
|
||||
},
|
||||
[](py::dict dict) -> std::unique_ptr<PyClass> {
|
||||
return std::make_unique<PyClass>(MfccOptionsFromDict(dict));
|
||||
}));
|
||||
}
|
||||
|
||||
void PybindFeatureMfcc(py::module &m) {
|
||||
|
@ -4,9 +4,11 @@
|
||||
|
||||
#include "kaldifeat/python/csrc/feature-plp.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/feature-plp.h"
|
||||
#include "kaldifeat/python/csrc/utils.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
@ -14,6 +16,41 @@ void PybindPlpOptions(py::module &m) {
|
||||
using PyClass = PlpOptions;
|
||||
py::class_<PyClass>(m, "PlpOptions")
|
||||
.def(py::init<>())
|
||||
.def(py::init([](const MelBanksOptions &mel_opts,
|
||||
const FrameExtractionOptions &frame_opts =
|
||||
FrameExtractionOptions(),
|
||||
int32_t lpc_order = 12, int32_t num_ceps = 13,
|
||||
bool use_energy = true, float energy_floor = 0.0,
|
||||
bool raw_energy = true, float compress_factor = 0.33333,
|
||||
int32_t cepstral_lifter = 22, float cepstral_scale = 1.0,
|
||||
bool htk_compat = false,
|
||||
py::object device =
|
||||
py::str("cpu")) -> std::unique_ptr<PlpOptions> {
|
||||
auto opts = std::make_unique<PlpOptions>();
|
||||
opts->frame_opts = frame_opts;
|
||||
opts->mel_opts = mel_opts;
|
||||
opts->lpc_order = lpc_order;
|
||||
opts->num_ceps = num_ceps;
|
||||
opts->use_energy = use_energy;
|
||||
opts->energy_floor = energy_floor;
|
||||
opts->raw_energy = raw_energy;
|
||||
opts->compress_factor = compress_factor;
|
||||
opts->cepstral_lifter = cepstral_lifter;
|
||||
opts->cepstral_scale = cepstral_scale;
|
||||
opts->htk_compat = htk_compat;
|
||||
|
||||
std::string s = static_cast<py::str>(device);
|
||||
opts->device = torch::Device(s);
|
||||
|
||||
return opts;
|
||||
}),
|
||||
py::arg("mel_opts"),
|
||||
py::arg("frame_opts") = FrameExtractionOptions(),
|
||||
py::arg("lpc_order") = 12, py::arg("num_ceps") = 13,
|
||||
py::arg("use_energy") = true, py::arg("energy_floor") = 0.0,
|
||||
py::arg("raw_energy") = true, py::arg("compress_factor") = 0.33333,
|
||||
py::arg("cepstral_lifter") = 22, py::arg("cepstral_scale") = 1.0,
|
||||
py::arg("htk_compat") = false, py::arg("device") = py::str("cpu"))
|
||||
.def_readwrite("frame_opts", &PyClass::frame_opts)
|
||||
.def_readwrite("mel_opts", &PyClass::mel_opts)
|
||||
.def_readwrite("lpc_order", &PyClass::lpc_order)
|
||||
@ -36,7 +73,15 @@ void PybindPlpOptions(py::module &m) {
|
||||
self.device = torch::Device(s);
|
||||
})
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); });
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static(
|
||||
"from_dict",
|
||||
[](py::dict dict) -> PyClass { return PlpOptionsFromDict(dict); })
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass { return PlpOptionsFromDict(dict); }));
|
||||
}
|
||||
|
||||
static void PybindPlp(py::module &m) {
|
||||
@ -46,7 +91,14 @@ static void PybindPlp(py::module &m) {
|
||||
.def("dim", &PyClass::Dim)
|
||||
.def_property_readonly("options", &PyClass::GetOptions)
|
||||
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
|
||||
py::arg("vtln_warp"));
|
||||
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict {
|
||||
return AsDict(self.GetOptions());
|
||||
},
|
||||
[](py::dict dict) -> std::unique_ptr<PyClass> {
|
||||
return std::make_unique<PyClass>(PlpOptionsFromDict(dict));
|
||||
}));
|
||||
}
|
||||
|
||||
void PybindFeaturePlp(py::module &m) {
|
||||
|
@ -4,16 +4,38 @@
|
||||
|
||||
#include "kaldifeat/python/csrc/feature-spectrogram.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/feature-spectrogram.h"
|
||||
#include "kaldifeat/python/csrc/utils.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
static void PybindSpectrogramOptions(py::module &m) {
|
||||
using PyClass = SpectrogramOptions;
|
||||
py::class_<PyClass>(m, "SpectrogramOptions")
|
||||
.def(py::init<>())
|
||||
.def(py::init([](const FrameExtractionOptions &frame_opts =
|
||||
FrameExtractionOptions(),
|
||||
float energy_floor = 0.0, bool raw_energy = true,
|
||||
bool return_raw_fft = false,
|
||||
py::object device = py::str(
|
||||
"cpu")) -> std::unique_ptr<SpectrogramOptions> {
|
||||
auto opts = std::make_unique<SpectrogramOptions>();
|
||||
opts->frame_opts = frame_opts;
|
||||
opts->energy_floor = energy_floor;
|
||||
opts->raw_energy = raw_energy;
|
||||
opts->return_raw_fft = return_raw_fft;
|
||||
|
||||
std::string s = static_cast<py::str>(device);
|
||||
opts->device = torch::Device(s);
|
||||
|
||||
return opts;
|
||||
}),
|
||||
py::arg("frame_opts") = FrameExtractionOptions(),
|
||||
py::arg("energy_floor") = 0.0, py::arg("raw_energy") = true,
|
||||
py::arg("return_raw_fft") = false,
|
||||
py::arg("device") = py::str("cpu"))
|
||||
.def_readwrite("frame_opts", &PyClass::frame_opts)
|
||||
.def_readwrite("energy_floor", &PyClass::energy_floor)
|
||||
.def_readwrite("raw_energy", &PyClass::raw_energy)
|
||||
@ -30,7 +52,18 @@ static void PybindSpectrogramOptions(py::module &m) {
|
||||
self.device = torch::Device(s);
|
||||
})
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); });
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static("from_dict",
|
||||
[](py::dict dict) -> PyClass {
|
||||
return SpectrogramOptionsFromDict(dict);
|
||||
})
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass {
|
||||
return SpectrogramOptionsFromDict(dict);
|
||||
}));
|
||||
}
|
||||
|
||||
static void PybindSpectrogram(py::module &m) {
|
||||
@ -40,7 +73,14 @@ static void PybindSpectrogram(py::module &m) {
|
||||
.def("dim", &PyClass::Dim)
|
||||
.def_property_readonly("options", &PyClass::GetOptions)
|
||||
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
|
||||
py::arg("vtln_warp"));
|
||||
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict {
|
||||
return AsDict(self.GetOptions());
|
||||
},
|
||||
[](py::dict dict) -> std::unique_ptr<PyClass> {
|
||||
return std::make_unique<PyClass>(SpectrogramOptionsFromDict(dict));
|
||||
}));
|
||||
}
|
||||
|
||||
void PybindFeatureSpectrogram(py::module &m) {
|
||||
|
@ -4,38 +4,78 @@
|
||||
|
||||
#include "kaldifeat/python/csrc/feature-window.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "kaldifeat/python/csrc/utils.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
static void PybindFrameExtractionOptions(py::module &m) {
|
||||
py::class_<FrameExtractionOptions>(m, "FrameExtractionOptions")
|
||||
.def(py::init<>())
|
||||
.def_readwrite("samp_freq", &FrameExtractionOptions::samp_freq)
|
||||
.def_readwrite("frame_shift_ms", &FrameExtractionOptions::frame_shift_ms)
|
||||
.def_readwrite("frame_length_ms",
|
||||
&FrameExtractionOptions::frame_length_ms)
|
||||
.def_readwrite("dither", &FrameExtractionOptions::dither)
|
||||
.def_readwrite("preemph_coeff", &FrameExtractionOptions::preemph_coeff)
|
||||
.def_readwrite("remove_dc_offset",
|
||||
&FrameExtractionOptions::remove_dc_offset)
|
||||
.def_readwrite("window_type", &FrameExtractionOptions::window_type)
|
||||
.def_readwrite("round_to_power_of_two",
|
||||
&FrameExtractionOptions::round_to_power_of_two)
|
||||
.def_readwrite("blackman_coeff", &FrameExtractionOptions::blackman_coeff)
|
||||
.def_readwrite("snip_edges", &FrameExtractionOptions::snip_edges)
|
||||
using PyClass = FrameExtractionOptions;
|
||||
py::class_<PyClass>(m, "FrameExtractionOptions")
|
||||
.def(
|
||||
py::init([](float samp_freq = 16000, float frame_shift_ms = 10.0f,
|
||||
float frame_length_ms = 25.0f, float dither = 1.0f,
|
||||
float preemph_coeff = 0.97f, bool remove_dc_offset = true,
|
||||
const std::string &window_type = "povey",
|
||||
bool round_to_power_of_two = true,
|
||||
float blackman_coeff = 0.42f, bool snip_edges = true,
|
||||
int32_t max_feature_vectors =
|
||||
-1) -> std::unique_ptr<FrameExtractionOptions> {
|
||||
auto opts = std::make_unique<FrameExtractionOptions>();
|
||||
|
||||
opts->samp_freq = samp_freq;
|
||||
opts->frame_shift_ms = frame_shift_ms;
|
||||
opts->frame_length_ms = frame_length_ms;
|
||||
opts->dither = dither;
|
||||
opts->preemph_coeff = preemph_coeff;
|
||||
opts->remove_dc_offset = remove_dc_offset;
|
||||
opts->window_type = window_type;
|
||||
opts->round_to_power_of_two = round_to_power_of_two;
|
||||
opts->blackman_coeff = blackman_coeff;
|
||||
opts->snip_edges = snip_edges;
|
||||
opts->max_feature_vectors = max_feature_vectors;
|
||||
|
||||
return opts;
|
||||
}),
|
||||
py::arg("samp_freq") = 16000, py::arg("frame_shift_ms") = 10.0f,
|
||||
py::arg("frame_length_ms") = 25.0f, py::arg("dither") = 1.0f,
|
||||
py::arg("preemph_coeff") = 0.97f, py::arg("remove_dc_offset") = true,
|
||||
py::arg("window_type") = "povey",
|
||||
py::arg("round_to_power_of_two") = true,
|
||||
py::arg("blackman_coeff") = 0.42f, py::arg("snip_edges") = true,
|
||||
py::arg("max_feature_vectors") = -1)
|
||||
.def_readwrite("samp_freq", &PyClass::samp_freq)
|
||||
.def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
|
||||
.def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
|
||||
.def_readwrite("dither", &PyClass::dither)
|
||||
.def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
|
||||
.def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset)
|
||||
.def_readwrite("window_type", &PyClass::window_type)
|
||||
.def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two)
|
||||
.def_readwrite("blackman_coeff", &PyClass::blackman_coeff)
|
||||
.def_readwrite("snip_edges", &PyClass::snip_edges)
|
||||
.def_readwrite("max_feature_vectors", &PyClass::max_feature_vectors)
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static("from_dict",
|
||||
[](py::dict dict) -> PyClass {
|
||||
return FrameExtractionOptionsFromDict(dict);
|
||||
})
|
||||
#if 0
|
||||
.def_readwrite("allow_downsample",
|
||||
&FrameExtractionOptions::allow_downsample)
|
||||
.def_readwrite("allow_upsample", &FrameExtractionOptions::allow_upsample)
|
||||
.def_readwrite("max_feature_vectors",
|
||||
&FrameExtractionOptions::max_feature_vectors)
|
||||
&PyClass::allow_downsample)
|
||||
.def_readwrite("allow_upsample", &PyClass::allow_upsample)
|
||||
#endif
|
||||
.def("__str__", [](const FrameExtractionOptions &self) -> std::string {
|
||||
return self.ToString();
|
||||
});
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass {
|
||||
return FrameExtractionOptionsFromDict(dict);
|
||||
}));
|
||||
|
||||
m.def("num_frames", &NumFrames, py::arg("num_samples"), py::arg("opts"),
|
||||
py::arg("flush") = true);
|
||||
|
@ -11,6 +11,8 @@
|
||||
#include "kaldifeat/python/csrc/feature-spectrogram.h"
|
||||
#include "kaldifeat/python/csrc/feature-window.h"
|
||||
#include "kaldifeat/python/csrc/mel-computations.h"
|
||||
#include "kaldifeat/python/csrc/online-feature.h"
|
||||
#include "kaldifeat/python/csrc/whisper-fbank.h"
|
||||
#include "torch/torch.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
@ -21,9 +23,11 @@ PYBIND11_MODULE(_kaldifeat, m) {
|
||||
PybindFeatureWindow(m);
|
||||
PybindMelComputations(m);
|
||||
PybindFeatureFbank(m);
|
||||
PybindWhisperFbank(&m);
|
||||
PybindFeatureMfcc(m);
|
||||
PybindFeaturePlp(m);
|
||||
PybindFeatureSpectrogram(m);
|
||||
PybindOnlineFeature(m);
|
||||
}
|
||||
|
||||
} // namespace kaldifeat
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_
|
||||
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "torch/torch.h"
|
||||
namespace py = pybind11;
|
||||
|
||||
#endif // KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_
|
||||
|
@ -4,16 +4,35 @@
|
||||
|
||||
#include "kaldifeat/python/csrc/mel-computations.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/mel-computations.h"
|
||||
#include "kaldifeat/python/csrc/utils.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
static void PybindMelBanksOptions(py::module &m) {
|
||||
using PyClass = MelBanksOptions;
|
||||
py::class_<PyClass>(m, "MelBanksOptions")
|
||||
.def(py::init<>())
|
||||
.def(py::init(
|
||||
[](int32_t num_bins = 25, float low_freq = 20,
|
||||
float high_freq = 0, float vtln_low = 100,
|
||||
float vtln_high = -500,
|
||||
bool debug_mel = false) -> std::unique_ptr<MelBanksOptions> {
|
||||
auto opts = std::make_unique<MelBanksOptions>();
|
||||
|
||||
opts->num_bins = num_bins;
|
||||
opts->low_freq = low_freq;
|
||||
opts->high_freq = high_freq;
|
||||
opts->vtln_low = vtln_low;
|
||||
opts->vtln_high = vtln_high;
|
||||
|
||||
return opts;
|
||||
}),
|
||||
py::arg("num_bins") = 25, py::arg("low_freq") = 20,
|
||||
py::arg("high_freq") = 0, py::arg("vtln_low") = 100,
|
||||
py::arg("vtln_high") = -500, py::arg("debug_mel") = false)
|
||||
.def_readwrite("num_bins", &PyClass::num_bins)
|
||||
.def_readwrite("low_freq", &PyClass::low_freq)
|
||||
.def_readwrite("high_freq", &PyClass::high_freq)
|
||||
@ -22,7 +41,18 @@ static void PybindMelBanksOptions(py::module &m) {
|
||||
.def_readwrite("debug_mel", &PyClass::debug_mel)
|
||||
.def_readwrite("htk_mode", &PyClass::htk_mode)
|
||||
.def("__str__",
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); });
|
||||
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||
.def("as_dict",
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||
.def_static("from_dict",
|
||||
[](py::dict dict) -> PyClass {
|
||||
return MelBanksOptionsFromDict(dict);
|
||||
})
|
||||
.def(py::pickle(
|
||||
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||
[](py::dict dict) -> PyClass {
|
||||
return MelBanksOptionsFromDict(dict);
|
||||
}));
|
||||
}
|
||||
|
||||
void PybindMelComputations(py::module &m) { PybindMelBanksOptions(m); }
|
||||
|
39
kaldifeat/python/csrc/online-feature.cc
Normal file
@ -0,0 +1,39 @@
|
||||
// kaldifeat/python/csrc/online-feature.cc
|
||||
//
|
||||
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
#include "kaldifeat/python/csrc/online-feature.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/online-feature.h"
|
||||
namespace kaldifeat {
|
||||
|
||||
template <typename C>
|
||||
void PybindOnlineFeatureTpl(py::module &m, const std::string &class_name,
|
||||
const std::string &class_help_doc = "") {
|
||||
using PyClass = OnlineGenericBaseFeature<C>;
|
||||
using Options = typename C::Options;
|
||||
py::class_<PyClass>(m, class_name.c_str(), class_help_doc.c_str())
|
||||
.def(py::init<const Options &>(), py::arg("opts"))
|
||||
.def_property_readonly("dim", &PyClass::Dim)
|
||||
.def_property_readonly("frame_shift_in_seconds",
|
||||
&PyClass::FrameShiftInSeconds)
|
||||
.def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
|
||||
.def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
|
||||
.def("get_frame", &PyClass::GetFrame, py::arg("frame"))
|
||||
.def("get_frames", &PyClass::GetFrames, py::arg("frames"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def("accept_waveform", &PyClass::AcceptWaveform,
|
||||
py::arg("sampling_rate"), py::arg("waveform"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def("input_finished", &PyClass::InputFinished);
|
||||
}
|
||||
|
||||
void PybindOnlineFeature(py::module &m) {
|
||||
PybindOnlineFeatureTpl<Mfcc>(m, "OnlineMfcc");
|
||||
PybindOnlineFeatureTpl<Fbank>(m, "OnlineFbank");
|
||||
PybindOnlineFeatureTpl<Plp>(m, "OnlinePlp");
|
||||
}
|
||||
|
||||
} // namespace kaldifeat
|
16
kaldifeat/python/csrc/online-feature.h
Normal file
@ -0,0 +1,16 @@
|
||||
// kaldifeat/python/csrc/online-feature.h
|
||||
//
|
||||
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
#ifndef KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_
|
||||
#define KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_
|
||||
|
||||
#include "kaldifeat/python/csrc/kaldifeat.h"
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
void PybindOnlineFeature(py::module &m);
|
||||
|
||||
} // namespace kaldifeat
|
||||
|
||||
#endif // KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_
|
284
kaldifeat/python/csrc/utils.cc
Normal file
@ -0,0 +1,284 @@
|
||||
// kaldifeat/python/csrc/utils.cc
|
||||
//
|
||||
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
#include "kaldifeat/python/csrc/utils.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
|
||||
#define FROM_DICT(type, key) \
|
||||
if (dict.contains(#key)) { \
|
||||
opts.key = py::type(dict[#key]); \
|
||||
}
|
||||
|
||||
#define AS_DICT(key) dict[#key] = opts.key
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict) {
|
||||
FrameExtractionOptions opts;
|
||||
|
||||
FROM_DICT(float_, samp_freq);
|
||||
FROM_DICT(float_, frame_shift_ms);
|
||||
FROM_DICT(float_, frame_length_ms);
|
||||
FROM_DICT(float_, dither);
|
||||
FROM_DICT(float_, preemph_coeff);
|
||||
FROM_DICT(bool_, remove_dc_offset);
|
||||
FROM_DICT(str, window_type);
|
||||
FROM_DICT(bool_, round_to_power_of_two);
|
||||
FROM_DICT(float_, blackman_coeff);
|
||||
FROM_DICT(bool_, snip_edges);
|
||||
FROM_DICT(int_, max_feature_vectors);
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
py::dict AsDict(const FrameExtractionOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
AS_DICT(samp_freq);
|
||||
AS_DICT(frame_shift_ms);
|
||||
AS_DICT(frame_length_ms);
|
||||
AS_DICT(dither);
|
||||
AS_DICT(preemph_coeff);
|
||||
AS_DICT(remove_dc_offset);
|
||||
AS_DICT(window_type);
|
||||
AS_DICT(round_to_power_of_two);
|
||||
AS_DICT(blackman_coeff);
|
||||
AS_DICT(snip_edges);
|
||||
AS_DICT(max_feature_vectors);
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
MelBanksOptions MelBanksOptionsFromDict(py::dict dict) {
|
||||
MelBanksOptions opts;
|
||||
|
||||
FROM_DICT(int_, num_bins);
|
||||
FROM_DICT(float_, low_freq);
|
||||
FROM_DICT(float_, high_freq);
|
||||
FROM_DICT(float_, vtln_low);
|
||||
FROM_DICT(float_, vtln_high);
|
||||
FROM_DICT(bool_, debug_mel);
|
||||
FROM_DICT(bool_, htk_mode);
|
||||
|
||||
return opts;
|
||||
}
|
||||
py::dict AsDict(const MelBanksOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
AS_DICT(num_bins);
|
||||
AS_DICT(low_freq);
|
||||
AS_DICT(high_freq);
|
||||
AS_DICT(vtln_low);
|
||||
AS_DICT(vtln_high);
|
||||
AS_DICT(debug_mel);
|
||||
AS_DICT(htk_mode);
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
FbankOptions FbankOptionsFromDict(py::dict dict) {
|
||||
FbankOptions opts;
|
||||
|
||||
if (dict.contains("frame_opts")) {
|
||||
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
|
||||
}
|
||||
|
||||
if (dict.contains("mel_opts")) {
|
||||
opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
|
||||
}
|
||||
|
||||
FROM_DICT(bool_, use_energy);
|
||||
FROM_DICT(float_, energy_floor);
|
||||
FROM_DICT(bool_, raw_energy);
|
||||
FROM_DICT(bool_, htk_compat);
|
||||
FROM_DICT(bool_, use_log_fbank);
|
||||
FROM_DICT(bool_, use_power);
|
||||
|
||||
if (dict.contains("device")) {
|
||||
opts.device = torch::Device(std::string(py::str(dict["device"])));
|
||||
}
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
py::dict AsDict(const FbankOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
dict["frame_opts"] = AsDict(opts.frame_opts);
|
||||
dict["mel_opts"] = AsDict(opts.mel_opts);
|
||||
AS_DICT(use_energy);
|
||||
AS_DICT(energy_floor);
|
||||
AS_DICT(raw_energy);
|
||||
AS_DICT(htk_compat);
|
||||
AS_DICT(use_log_fbank);
|
||||
AS_DICT(use_power);
|
||||
|
||||
auto torch_device = py::module_::import("torch").attr("device");
|
||||
dict["device"] = torch_device(opts.device.str());
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict) {
|
||||
WhisperFbankOptions opts;
|
||||
|
||||
if (dict.contains("frame_opts")) {
|
||||
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
|
||||
}
|
||||
|
||||
FROM_DICT(int_, num_mels);
|
||||
|
||||
if (dict.contains("device")) {
|
||||
opts.device = torch::Device(std::string(py::str(dict["device"])));
|
||||
}
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
py::dict AsDict(const WhisperFbankOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
dict["frame_opts"] = AsDict(opts.frame_opts);
|
||||
|
||||
AS_DICT(num_mels);
|
||||
|
||||
auto torch_device = py::module_::import("torch").attr("device");
|
||||
dict["device"] = torch_device(opts.device.str());
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
MfccOptions MfccOptionsFromDict(py::dict dict) {
|
||||
MfccOptions opts;
|
||||
|
||||
if (dict.contains("frame_opts")) {
|
||||
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
|
||||
}
|
||||
|
||||
if (dict.contains("mel_opts")) {
|
||||
opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
|
||||
}
|
||||
|
||||
FROM_DICT(int_, num_ceps);
|
||||
FROM_DICT(bool_, use_energy);
|
||||
FROM_DICT(float_, energy_floor);
|
||||
FROM_DICT(bool_, raw_energy);
|
||||
FROM_DICT(float_, cepstral_lifter);
|
||||
FROM_DICT(bool_, htk_compat);
|
||||
|
||||
if (dict.contains("device")) {
|
||||
opts.device = torch::Device(std::string(py::str(dict["device"])));
|
||||
}
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
py::dict AsDict(const MfccOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
dict["frame_opts"] = AsDict(opts.frame_opts);
|
||||
dict["mel_opts"] = AsDict(opts.mel_opts);
|
||||
|
||||
AS_DICT(num_ceps);
|
||||
AS_DICT(use_energy);
|
||||
AS_DICT(energy_floor);
|
||||
AS_DICT(raw_energy);
|
||||
AS_DICT(cepstral_lifter);
|
||||
AS_DICT(htk_compat);
|
||||
|
||||
auto torch_device = py::module_::import("torch").attr("device");
|
||||
dict["device"] = torch_device(opts.device.str());
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
SpectrogramOptions SpectrogramOptionsFromDict(py::dict dict) {
|
||||
SpectrogramOptions opts;
|
||||
|
||||
if (dict.contains("frame_opts")) {
|
||||
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
|
||||
}
|
||||
|
||||
FROM_DICT(float_, energy_floor);
|
||||
FROM_DICT(bool_, raw_energy);
|
||||
// FROM_DICT(bool_, return_raw_fft);
|
||||
|
||||
if (dict.contains("device")) {
|
||||
opts.device = torch::Device(std::string(py::str(dict["device"])));
|
||||
}
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
py::dict AsDict(const SpectrogramOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
dict["frame_opts"] = AsDict(opts.frame_opts);
|
||||
|
||||
AS_DICT(energy_floor);
|
||||
AS_DICT(raw_energy);
|
||||
|
||||
auto torch_device = py::module_::import("torch").attr("device");
|
||||
dict["device"] = torch_device(opts.device.str());
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
PlpOptions PlpOptionsFromDict(py::dict dict) {
|
||||
PlpOptions opts;
|
||||
|
||||
if (dict.contains("frame_opts")) {
|
||||
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
|
||||
}
|
||||
|
||||
if (dict.contains("mel_opts")) {
|
||||
opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
|
||||
}
|
||||
|
||||
FROM_DICT(int_, lpc_order);
|
||||
FROM_DICT(int_, num_ceps);
|
||||
FROM_DICT(bool_, use_energy);
|
||||
FROM_DICT(float_, energy_floor);
|
||||
FROM_DICT(bool_, raw_energy);
|
||||
FROM_DICT(float_, compress_factor);
|
||||
FROM_DICT(int_, cepstral_lifter);
|
||||
FROM_DICT(float_, cepstral_scale);
|
||||
FROM_DICT(bool_, htk_compat);
|
||||
|
||||
if (dict.contains("device")) {
|
||||
opts.device = torch::Device(std::string(py::str(dict["device"])));
|
||||
}
|
||||
|
||||
return opts;
|
||||
}
|
||||
|
||||
py::dict AsDict(const PlpOptions &opts) {
|
||||
py::dict dict;
|
||||
|
||||
dict["frame_opts"] = AsDict(opts.frame_opts);
|
||||
dict["mel_opts"] = AsDict(opts.mel_opts);
|
||||
|
||||
AS_DICT(lpc_order);
|
||||
AS_DICT(num_ceps);
|
||||
AS_DICT(use_energy);
|
||||
AS_DICT(energy_floor);
|
||||
AS_DICT(raw_energy);
|
||||
AS_DICT(compress_factor);
|
||||
AS_DICT(cepstral_lifter);
|
||||
AS_DICT(cepstral_scale);
|
||||
AS_DICT(htk_compat);
|
||||
|
||||
auto torch_device = py::module_::import("torch").attr("device");
|
||||
dict["device"] = torch_device(opts.device.str());
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
#undef FROM_DICT
|
||||
#undef AS_DICT
|
||||
|
||||
} // namespace kaldifeat
|
54
kaldifeat/python/csrc/utils.h
Normal file
@ -0,0 +1,54 @@
|
||||
// kaldifeat/python/csrc/utils.h
|
||||
//
|
||||
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
#ifndef KALDIFEAT_PYTHON_CSRC_UTILS_H_
|
||||
#define KALDIFEAT_PYTHON_CSRC_UTILS_H_
|
||||
|
||||
#include "kaldifeat/csrc/feature-fbank.h"
|
||||
#include "kaldifeat/csrc/feature-mfcc.h"
|
||||
#include "kaldifeat/csrc/feature-plp.h"
|
||||
#include "kaldifeat/csrc/feature-spectrogram.h"
|
||||
#include "kaldifeat/csrc/feature-window.h"
|
||||
#include "kaldifeat/csrc/mel-computations.h"
|
||||
#include "kaldifeat/csrc/whisper-fbank.h"
|
||||
#include "kaldifeat/python/csrc/kaldifeat.h"
|
||||
|
||||
/*
|
||||
* This file contains code about `from_dict` and
|
||||
* `as_dict` for various options in kaldifeat.
|
||||
*
|
||||
* Regarding `from_dict`, users don't need to provide
|
||||
* all the fields in the options. If some fields
|
||||
* are not provided, it just uses the default one.
|
||||
*
|
||||
* If the provided dict in `from_dict` is empty,
|
||||
* all fields use their default values.
|
||||
*/
|
||||
|
||||
namespace kaldifeat {
|
||||
|
||||
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const FrameExtractionOptions &opts);
|
||||
|
||||
MelBanksOptions MelBanksOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const MelBanksOptions &opts);
|
||||
|
||||
FbankOptions FbankOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const FbankOptions &opts);
|
||||
|
||||
WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const WhisperFbankOptions &opts);
|
||||
|
||||
MfccOptions MfccOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const MfccOptions &opts);
|
||||
|
||||
SpectrogramOptions SpectrogramOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const SpectrogramOptions &opts);
|
||||
|
||||
PlpOptions PlpOptionsFromDict(py::dict dict);
|
||||
py::dict AsDict(const PlpOptions &opts);
|
||||
|
||||
} // namespace kaldifeat
|
||||
|
||||
#endif // KALDIFEAT_PYTHON_CSRC_UTILS_H_
|