Compare commits

..

No commits in common. "master" and "v1.3" have entirely different histories.
master ... v1.3

137 changed files with 932 additions and 13727 deletions

View File

@ -3,10 +3,8 @@ max-line-length = 80
exclude =
.git,
doc,
build,
build_release,
cmake/cmake_extension.py,
kaldifeat/python/kaldifeat/__init__.py
ignore =

View File

@ -1,81 +0,0 @@
# Copyright 2022 Xiaomi Corp. (author: Fangjun Kuang)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# refer to https://github.com/actions/starter-workflows/pull/47/files
# You can access it at https://csukuangfj.github.io/kaldifeat
name: Generate doc
on:
push:
branches:
- master
- doc
workflow_dispatch:
jobs:
build-doc:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: [3.8]
steps:
# refer to https://github.com/actions/checkout
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Update wheels
shell: bash
run: |
export KALDIFEAT_DIR=$PWD
ls -lh $KALDIFEAT_DIR
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
cd huggingface
./run.sh
- name: Build doc
shell: bash
run: |
cd doc
git status
python3 -m pip install -r ./requirements.txt
make html
cp source/cpu.html build/html/
cp source/cuda.html build/html/
cp source/cpu-cn.html build/html/
cp source/cuda-cn.html build/html/
touch build/html/.nojekyll
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./doc/build/html
publish_branch: gh-pages

View File

@ -1,121 +0,0 @@
name: build-wheels-cpu-macos
on:
push:
branches:
# - wheel
- torch-2.8.0
tags:
- '*'
workflow_dispatch:
concurrency:
group: build-wheels-cpu-macos-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
# python ./scripts/github_actions/generate_build_matrix.py --for-macos
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-macos)
python ./scripts/github_actions/generate_build_matrix.py --for-macos --test-only-latest-torch
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-macos --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
build_wheels_macos_cpu:
needs: generate_build_matrix
name: ${{ matrix.torch }} ${{ matrix.python-version }}
runs-on: macos-14
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
pip install -q torch==${{ matrix.torch}} cmake numpy wheel>=0.40.0 twine setuptools
- name: Build wheel
shell: bash
run: |
python3 setup.py bdist_wheel
mkdir wheelhouse
cp -v dist/* wheelhouse
- name: Display wheels (before fix)
shell: bash
run: |
ls -lh ./wheelhouse/
- name: Fix wheel platform tag
run: |
# See https://github.com/glencoesoftware/zeroc-ice-py-macos-x86_64/pull/3/files
# See:
# * https://github.com/pypa/wheel/issues/406
python -m wheel tags \
--platform-tag=macosx_11_0_arm64 \
--remove wheelhouse/*.whl
- name: Display wheels (after fix)
shell: bash
run: |
ls -lh ./wheelhouse/
- name: Upload Wheel
uses: actions/upload-artifact@v4
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-macos-latest-cpu
path: wheelhouse/*.whl
# https://huggingface.co/docs/hub/spaces-github-actions
- name: Publish to huggingface
if: github.repository_owner == 'csukuangfj'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v2
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
cd huggingface
git pull
d=cpu/1.25.5.dev20241029/macos
mkdir -p $d
cp -v ../wheelhouse/*.whl ./$d
git status
git lfs track "*.whl"
git add .
git commit -m "upload macos wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main

View File

@ -20,37 +20,88 @@ on:
push:
tags:
- '*'
workflow_dispatch:
jobs:
pypi:
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-18.04]
cuda: ["10.1"]
gcc: ["5"]
torch: ["1.8.1"]
python-version: [3.6, 3.7, 3.8]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: ${{ matrix.python-version }}
- name: Install Python dependencies
- name: Install CUDA Toolkit ${{ matrix.cuda }}
shell: bash
env:
cuda: ${{ matrix.cuda }}
run: |
source ./scripts/github_actions/install_cuda.sh
echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV
echo "${CUDA_HOME}/bin" >> $GITHUB_PATH
echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
- name: Display NVCC version
run: |
which nvcc
nvcc --version
- name: Install GCC ${{ matrix.gcc }}
run: |
sudo apt-get install -y gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }}
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> $GITHUB_ENV
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV
- name: Install PyTorch ${{ matrix.torch }}
env:
cuda: ${{ matrix.cuda }}
torch: ${{ matrix.torch }}
shell: bash
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel twine setuptools
python3 -m pip install torch==1.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
python3 -m pip install wheel twine typing_extensions
python3 -m pip install bs4 requests tqdm
- name: Build
shell: bash
./scripts/github_actions/install_torch.sh
python3 -c "import torch; print('torch version:', torch.__version__)"
- name: Download cudnn 8.0
env:
cuda: ${{ matrix.cuda }}
run: |
python3 setup.py sdist
ls -l dist/*
./scripts/github_actions/install_cudnn.sh
- name: Build pip packages
shell: bash
env:
KALDIFEAT_IS_FOR_PYPI: 1
run: |
tag=$(python3 -c "import sys; print(''.join(sys.version[:3].split('.')))")
export KALDIFEAT_MAKE_ARGS="-j2"
python3 setup.py bdist_wheel --python-tag=py${tag}
ls -lh dist/
- name: Publish wheels to PyPI
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
twine upload dist/kaldifeat-*.tar.gz
twine upload dist/kaldifeat-*.whl
- name: Upload Wheel
uses: actions/upload-artifact@v2
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-${{ matrix.os }}
path: dist/*.whl

View File

@ -1,85 +0,0 @@
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: Run tests macos cpu
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
run_tests_macos_cpu:
needs: generate_build_matrix
runs-on: macos-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install PyTorch ${{ matrix.torch }}
shell: bash
run: |
python3 -m pip install -qq --upgrade pip
python3 -m pip install -qq wheel twine typing_extensions soundfile numpy
python3 -m pip install -qq torch==${{ matrix.torch }} -f https://download.pytorch.org/whl/torch_stable.html || python3 -m pip install -qq torch==${{ matrix.torch }} -f https://download.pytorch.org/whl/torch/
python3 -c "import torch; print('torch version:', torch.__version__)"
- name: Build
shell: bash
run: |
mkdir build_release
cd build_release
cmake -DCMAKE_CXX_STANDARD=17 ..
make VERBOSE=1 -j3
- name: Run tests
shell: bash
run: |
cd build_release
ctest --output-on-failure

View File

@ -1,88 +0,0 @@
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: Run tests ubuntu cpu
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
run_tests_ubuntu_cpu:
needs: generate_build_matrix
runs-on: ubuntu-18.04
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install PyTorch ${{ matrix.torch }}
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y libsndfile1-dev libsndfile1 ffmpeg
python3 -m pip install --upgrade pip
python3 -m pip install wheel twine typing_extensions soundfile
python3 -m pip install bs4 requests tqdm numpy
python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch/
python3 -c "import torch; print('torch version:', torch.__version__)"
- name: Build
shell: bash
run: |
mkdir build_release
cd build_release
cmake -DCMAKE_CXX_STANDARD=17 ..
make VERBOSE=1 -j3
- name: Run tests
shell: bash
run: |
cd build_release
ctest --output-on-failure

View File

@ -1,112 +0,0 @@
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: Run tests ubuntu cuda
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
run_tests_ubuntu_cuda:
needs: generate_build_matrix
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install CUDA Toolkit ${{ matrix.cuda }}
shell: bash
env:
cuda: ${{ matrix.cuda }}
run: |
source ./scripts/github_actions/install_cuda.sh
echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV
echo "${CUDA_HOME}/bin" >> $GITHUB_PATH
echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
- name: Display NVCC version
run: |
which nvcc
nvcc --version
- name: Install PyTorch ${{ matrix.torch }}
env:
cuda: ${{ matrix.cuda }}
torch: ${{ matrix.torch }}
shell: bash
run: |
sudo apt-get update
sudo apt-get install -y libsndfile1-dev libsndfile1 ffmpeg
python3 -m pip install --upgrade pip
python3 -m pip install wheel twine typing_extensions soundfile
python3 -m pip install bs4 requests tqdm numpy
./scripts/github_actions/install_torch.sh
python3 -c "import torch; print('torch version:', torch.__version__)"
- name: Download cudnn 8.0
env:
cuda: ${{ matrix.cuda }}
run: |
./scripts/github_actions/install_cudnn.sh
- name: Build
shell: bash
run: |
mkdir build_release
cd build_release
cmake -DCMAKE_CXX_STANDARD=17 ..
make VERBOSE=1 -j3
- name: Run tests
shell: bash
run: |
cd build_release
ctest --output-on-failure

View File

@ -1,121 +0,0 @@
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: Run tests windows cpu
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
run_tests_windows_cpu:
# see https://github.com/actions/virtual-environments/blob/win19/20210525.0/images/win/Windows2019-Readme.md
needs: generate_build_matrix
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# see https://github.com/microsoft/setup-msbuild
- name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v1.0.2
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Install PyTorch ${{ matrix.torch }}
run: |
pip3 install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || pip3 install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch/
pip3 install -qq wheel twine dataclasses numpy typing_extensions soundfile
- name: Display CMake version
run: |
cmake --version
cmake --help
- name: Configure CMake
shell: bash
run: |
mkdir build_release
cd build_release
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
ls -lh
- name: Build kaldifeat
run: |
cd build_release
cmake --build -DCMAKE_CXX_STANDARD=17 . --target _kaldifeat --config Release
- name: Display generated files
shell: bash
run: |
cd build_release
ls -lh lib/*/*
- name: Build wheel
shell: bash
run: |
python3 setup.py bdist_wheel
ls -lh dist/
pip install ./dist/*.whl
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
- name: Upload Wheel
uses: actions/upload-artifact@v4
with:
name: python-${{ matrix.python-version }}-${{ matrix.os }}-cpu
path: dist/*.whl
- name: Build tests
shell: bash
run: |
cd build_release
cmake -DCMAKE_CXX_STANDARD=17 --build . --target ALL_BUILD --config Release
ls -lh bin/*/*
ctest -C Release --verbose --output-on-failure

View File

@ -1,173 +0,0 @@
# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Run tests windows cuda
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python scripts/github_actions/generate_build_matrix.py --enable-cuda --for-windows --test-only-latest-torch
MATRIX=$(python scripts/github_actions/generate_build_matrix.py --enable-cuda --for-windows --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
run_tests_windows_cuda:
needs: generate_build_matrix
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# see https://github.com/microsoft/setup-msbuild
- name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v1.0.2
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Display Python version
run: python -c "import sys; print(sys.version)"
# See https://github.com/Jimver/cuda-toolkit/blob/master/src/links/windows-links.ts
# for available CUDA versions
- uses: Jimver/cuda-toolkit@v0.2.7
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda }}
- name: Display CUDA version
shell: bash
run: |
echo "Installed cuda version is: ${{ steps.cuda-toolkit.outputs.cuda }}"
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
nvcc --version
- name: Remove CUDA installation package
shell: bash
run: |
rm "C:/hostedtoolcache/windows/cuda_installer-windows/${{ matrix.cuda }}/x64/cuda_installer_${{ matrix.cuda }}.exe"
- name: Download cuDNN
shell: bash
run: |
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/cudnn-for-windows
cd cudnn-for-windows
git lfs pull --include="cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip"
unzip cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip
rm cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip
ls -lh *
ls -lh */*
echo "PWD: $PWD"
- name: Install PyTorch ${{ matrix.torch }}
shell: bash
run: |
version=${{ matrix.cuda }}
major=${version:0:2}
minor=${version:3:1}
v=${major}${minor}
if [ ${v} -eq 102 ]; then v=""; else v="+cu${v}"; fi
python3 -m pip install -qq --upgrade pip
python3 -m pip install -qq wheel twine numpy typing_extensions
python3 -m pip install -qq dataclasses soundfile numpy
python3 -m pip install -qq torch==${{ matrix.torch }}${v} -f https://download.pytorch.org/whl/torch_stable.html numpy || python3 -m pip install -qq torch==${{ matrix.torch }}${v} -f https://download.pytorch.org/whl/torch/ numpy
python3 -c "import torch; print('torch version:', torch.__version__)"
python3 -m torch.utils.collect_env
- name: Display CMake version
run: |
cmake --version
cmake --help
- name: Configure CMake
shell: bash
run: |
echo "PWD: $PWD"
ls -lh
mkdir build_release
cd build_release
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCUDNN_INCLUDE_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/include -DCUDNN_LIBRARY_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/lib/cudnn.lib ..
ls -lh
- name: Build kaldifeat
shell: bash
run: |
cd build_release
cmake --build . --target _kaldifeat --config Release
- name: Display generated files
shell: bash
run: |
cd build_release
ls -lh lib/*/*
- name: Build wheel
shell: bash
run: |
echo $PWD
ls -lh ./*
export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCUDNN_INCLUDE_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/include -DCUDNN_LIBRARY_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/lib/cudnn.lib"
python3 setup.py bdist_wheel
ls -lh dist/
pip install ./dist/*.whl
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
- name: Upload Wheel
uses: actions/upload-artifact@v4
with:
name: python-${{ matrix.python-version }}-${{ matrix.os }}-cuda-${{ matrix.cuda }}
path: dist/*.whl
- name: Build tests
shell: bash
run: |
cd build_release
cmake -DCMAKE_CXX_STANDARD=17 --build . --target ALL_BUILD --config Release
ls -lh bin/*/*
ctest -C Release --verbose --output-on-failure

View File

@ -1,64 +0,0 @@
# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: style_check
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
style_check:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
python-version: ["3.8"]
fail-fast: false
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install Python dependencies
run: |
python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2 click==8.0.4
# See https://github.com/psf/black/issues/2964
# The version of click should be selected from 8.0.0, 8.0.1, 8.0.2, 8.0.3, and 8.0.4
- name: Run flake8
shell: bash
working-directory: ${{github.workspace}}
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --show-source --statistics
flake8 .
- name: Run black
shell: bash
working-directory: ${{github.workspace}}
run: |
black --check --diff .

View File

@ -1,67 +0,0 @@
name: Test pre-compiled wheels
on:
workflow_dispatch:
inputs:
torch_version:
description: "torch version, e.g., 2.0.1"
required: true
kaldifeat_version:
description: "kaldifeat version, e.g., 1.25.0.dev20230726"
required: true
jobs:
Test_pre_compiled_wheels:
name: ${{ matrix.os }} ${{ github.event.inputs.torch_version }} ${{ github.event.inputs.kaldifeat_version }} ${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.8", "3.9", "3.10"]
steps:
# refer to https://github.com/actions/checkout
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Install dependencies
shell: bash
run: |
pip install numpy
- name: Install torch
if: startsWith(matrix.os, 'macos')
shell: bash
run: |
pip install torch==${{ github.event.inputs.torch_version }}
- name: Install torch
if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'windows')
shell: bash
run: |
pip install torch==${{ github.event.inputs.torch_version }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || pip install torch==${{ github.event.inputs.torch_version }}+cpu -f https://download.pytorch.org/whl/torch/
- name: Install kaldifeat
shell: bash
run: |
pip install kaldifeat==${{ github.event.inputs.kaldifeat_version }}+cpu.torch${{ github.event.inputs.torch_version }} -f https://csukuangfj.github.io/kaldifeat/cpu.html
- name: Run tests
shell: bash
run: |
cd kaldifeat/python/tests
python3 -c "import kaldifeat; print(kaldifeat.__file__)"
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
python3 ./test_fbank_options.py
python3 ./test_mfcc_options.py

View File

@ -1,168 +0,0 @@
name: build-wheels-cpu-arm64-ubuntu
on:
push:
branches:
# - wheel
- torch-2.8.0
tags:
- '*'
workflow_dispatch:
concurrency:
group: build-wheels-cpu-arm64-ubuntu-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
# python ./scripts/github_actions/generate_build_matrix.py --for-arm64
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-arm64)
python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch --for-arm64
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch --for-arm64)
echo "::set-output name=matrix::${MATRIX}"
build-manylinux-wheels:
needs: generate_build_matrix
name: ${{ matrix.torch }} ${{ matrix.python-version }}
runs-on: ubuntu-22.04-arm
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
# https://github.com/pytorch/builder/tree/main/manywheel
# https://github.com/pytorch/builder/pull/476
# https://github.com/k2-fsa/k2/issues/733
# https://github.com/pytorch/pytorch/pull/50633 (generate build matrix)
- name: Run the build process with Docker
uses: addnab/docker-run-action@v3
with:
image: ${{ matrix.image }}
options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }}
run: |
echo "pwd: $PWD"
uname -a
id
cat /etc/*release
gcc --version
python3 --version
which python3
ls -lh /opt/python/
echo "---"
ls -lh /opt/python/cp*
ls -lh /opt/python/*/bin
echo "---"
find /opt/python/cp* -name "libpython*"
echo "-----"
find /opt/_internal/cp* -name "libpython*"
echo "-----"
find / -name "libpython*"
echo "----"
ls -lh /usr/lib64/libpython3.so
# cp36-cp36m
# cp37-cp37m
# cp38-cp38
# cp39-cp39
# cp310-cp310
# cp311-cp311
# cp312-cp312
# cp313-cp313
# cp313-cp313t (no gil)
if [[ $PYTHON_VERSION == "3.6" ]]; then
python_dir=/opt/python/cp36-cp36m
export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.7" ]]; then
python_dir=/opt/python/cp37-cp37m
export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.8" ]]; then
python_dir=/opt/python/cp38-cp38
export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.9" ]]; then
python_dir=/opt/python/cp39-cp39
export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.10" ]]; then
python_dir=/opt/python/cp310-cp310
export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.11" ]]; then
python_dir=/opt/python/cp311-cp311
export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.12" ]]; then
python_dir=/opt/python/cp312-cp312
export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.13" ]]; then
python_dir=/opt/python/cp313-cp313
export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
else
echo "Unsupported Python version $PYTHON_VERSION"
exit 1
fi
export PYTHON_INSTALL_DIR=$python_dir
export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
python3 --version
which python3
/var/www/scripts/github_actions/build-ubuntu-cpu-arm64.sh
- name: Display wheels
shell: bash
run: |
ls -lh ./wheelhouse/
# https://huggingface.co/docs/hub/spaces-github-actions
- name: Publish to huggingface
if: github.repository_owner == 'csukuangfj'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v2
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
cd huggingface
git pull
d=cpu/1.25.5.dev20250307/linux-arm64
mkdir -p $d
cp -v ../wheelhouse/*.whl ./$d
git status
git lfs track "*.whl"
git add .
git commit -m "upload ubuntu-arm64-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main

View File

@ -1,168 +0,0 @@
name: build-wheels-cpu-ubuntu
on:
push:
branches:
# - wheel
- torch-2.8.0
tags:
- '*'
workflow_dispatch:
concurrency:
group: build-wheels-cpu-ubuntu-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
# python ./scripts/github_actions/generate_build_matrix.py
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py)
python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
build-manylinux-wheels:
needs: generate_build_matrix
name: ${{ matrix.torch }} ${{ matrix.python-version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
# https://github.com/pytorch/builder/tree/main/manywheel
# https://github.com/pytorch/builder/pull/476
# https://github.com/k2-fsa/k2/issues/733
# https://github.com/pytorch/pytorch/pull/50633 (generate build matrix)
- name: Run the build process with Docker
uses: addnab/docker-run-action@v3
with:
image: ${{ matrix.image }}
options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }}
run: |
echo "pwd: $PWD"
uname -a
id
cat /etc/*release
gcc --version
python3 --version
which python3
ls -lh /opt/python/
echo "---"
ls -lh /opt/python/cp*
ls -lh /opt/python/*/bin
echo "---"
find /opt/python/cp* -name "libpython*"
echo "-----"
find /opt/_internal/cp* -name "libpython*"
echo "-----"
find / -name "libpython*"
echo "----"
ls -lh /usr/lib64/libpython3.so || true
# cp36-cp36m
# cp37-cp37m
# cp38-cp38
# cp39-cp39
# cp310-cp310
# cp311-cp311
# cp312-cp312
# cp313-cp313
# cp313-cp313t (no gil)
if [[ $PYTHON_VERSION == "3.6" ]]; then
python_dir=/opt/python/cp36-cp36m
export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.7" ]]; then
python_dir=/opt/python/cp37-cp37m
export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.8" ]]; then
python_dir=/opt/python/cp38-cp38
export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.9" ]]; then
python_dir=/opt/python/cp39-cp39
export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.10" ]]; then
python_dir=/opt/python/cp310-cp310
export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.11" ]]; then
python_dir=/opt/python/cp311-cp311
export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.12" ]]; then
python_dir=/opt/python/cp312-cp312
export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.13" ]]; then
python_dir=/opt/python/cp313-cp313
export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
else
echo "Unsupported Python version $PYTHON_VERSION"
exit 1
fi
export PYTHON_INSTALL_DIR=$python_dir
export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
python3 --version
which python3
/var/www/scripts/github_actions/build-ubuntu-cpu.sh
- name: Display wheels
shell: bash
run: |
ls -lh ./wheelhouse/
# https://huggingface.co/docs/hub/spaces-github-actions
- name: Publish to huggingface
if: github.repository_owner == 'csukuangfj'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v2
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
cd huggingface
git pull
d=cpu/1.25.5.dev20250307/linux-x64
mkdir -p $d
cp -v ../wheelhouse/*.whl ./$d
git status
git lfs track "*.whl"
git add .
git commit -m "upload ubuntu-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main

View File

@ -1,194 +0,0 @@
name: build-wheels-cuda-ubuntu
on:
push:
branches:
- wheel
# - torch-2.7.1
tags:
- '*'
workflow_dispatch:
concurrency:
group: build-wheels-cuda-ubuntu-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
# python ./scripts/github_actions/generate_build_matrix.py --enable-cuda
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --enable-cuda)
python ./scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
build-manylinux-wheels:
needs: generate_build_matrix
name: ${{ matrix.torch }} ${{ matrix.python-version }} cuda${{ matrix.cuda }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
# see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
# https://github.com/pytorch/builder/tree/main/manywheel
# https://github.com/pytorch/builder/pull/476
# https://github.com/k2-fsa/k2/issues/733
# https://github.com/pytorch/pytorch/pull/50633 (generate build matrix)
- name: Run the build process with Docker
uses: addnab/docker-run-action@v3
with:
image: ${{ matrix.image }}
options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }} -e CUDA_VERSION=${{ matrix.cuda }}
run: |
echo "pwd: $PWD"
uname -a
id
cat /etc/*release
gcc --version
python3 --version
which python3
ls -lh /opt/python/
echo "---"
ls -lh /opt/python/cp*
ls -lh /opt/python/*/bin
echo "---"
find /opt/python/cp* -name "libpython*"
echo "-----"
find /opt/_internal/cp* -name "libpython*"
echo "-----"
find / -name "libpython*"
# cp36-cp36m
# cp37-cp37m
# cp38-cp38
# cp39-cp39
# cp310-cp310
# cp311-cp311
# cp312-cp312
# cp313-cp313
# cp313-cp313t (no gil)
if [[ $PYTHON_VERSION == "3.6" ]]; then
python_dir=/opt/python/cp36-cp36m
export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.7" ]]; then
python_dir=/opt/python/cp37-cp37m
export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.8" ]]; then
python_dir=/opt/python/cp38-cp38
export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.9" ]]; then
python_dir=/opt/python/cp39-cp39
export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.10" ]]; then
python_dir=/opt/python/cp310-cp310
export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.11" ]]; then
python_dir=/opt/python/cp311-cp311
export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.12" ]]; then
python_dir=/opt/python/cp312-cp312
export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
elif [[ $PYTHON_VERSION == "3.13" ]]; then
python_dir=/opt/python/cp313-cp313
export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
else
echo "Unsupported Python version $PYTHON_VERSION"
exit 1
fi
export PYTHON_INSTALL_DIR=$python_dir
export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
# There are no libpython.so inside $PYTHON_INSTALL_DIR
# since they are statically linked.
python3 --version
which python3
pushd /usr/local
rm cuda
ln -s cuda-$CUDA_VERSION cuda
popd
which nvcc
nvcc --version
cp /var/www/scripts/github_actions/install_torch.sh .
chmod +x install_torch.sh
/var/www/scripts/github_actions/build-ubuntu-cuda.sh
- name: Display wheels
shell: bash
run: |
ls -lh ./wheelhouse/
- name: Upload Wheel
if: false
uses: actions/upload-artifact@v4
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cuda-is_2_28-${{ matrix.is_2_28 }}
path: wheelhouse/*.whl
# https://huggingface.co/docs/hub/spaces-github-actions
- name: Publish to huggingface
if: github.repository_owner == 'csukuangfj'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v2
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
cd huggingface
git pull
d=cuda/1.25.5.dev20241029/linux
mkdir -p $d
cp -v ../wheelhouse/*.whl ./$d
git status
git lfs track "*.whl"
git add .
git commit -m "upload ubuntu-cuda wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main

View File

@ -1,108 +0,0 @@
name: build-wheels-cpu-win64
on:
push:
branches:
# - wheel
- torch-2.8.0
tags:
- '*'
workflow_dispatch:
concurrency:
group: build-wheels-cpu-win64-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
# python ./scripts/github_actions/generate_build_matrix.py --for-windows
# MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-windows)
python ./scripts/github_actions/generate_build_matrix.py --for-windows --test-only-latest-torch
MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-windows --test-only-latest-torch)
echo "::set-output name=matrix::${MATRIX}"
build_wheels_win64_cpu:
needs: generate_build_matrix
name: ${{ matrix.torch }} ${{ matrix.python-version }}
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
shell: bash
run: |
pip install -q torch==${{ matrix.torch}} cmake numpy wheel>=0.40.0 twine setuptools
pip install torch==${{ matrix.torch}}+cpu -f https://download.pytorch.org/whl/torch_stable.html cmake numpy || pip install torch==${{ matrix.torch}}+cpu -f https://download.pytorch.org/whl/torch/ cmake numpy
- name: Build wheel
shell: bash
run: |
python3 setup.py bdist_wheel
mkdir wheelhouse
cp -v dist/* wheelhouse
- name: Display wheels
shell: bash
run: |
ls -lh ./wheelhouse/
- name: Upload Wheel
uses: actions/upload-artifact@v4
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-windows-latest-cpu
path: wheelhouse/*.whl
# https://huggingface.co/docs/hub/spaces-github-actions
- name: Publish to huggingface
if: github.repository_owner == 'csukuangfj'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v2
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
cd huggingface
git pull
d=cpu/1.25.5.dev20241029/windows
mkdir -p $d
cp -v ../wheelhouse/*.whl ./$d
git status
git lfs track "*.whl"
git add .
git commit -m "upload windows-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main

3
.gitignore vendored
View File

@ -5,6 +5,3 @@ dist/
__pycache__/
test-1hour.wav
path.sh
torch_version.py
cpu*.html
cuda*.html

View File

@ -1,16 +1,10 @@
# Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang)
if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
endif()
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(kaldifeat)
# remember to change the version in
# scripts/conda/kaldifeat/meta.yaml
# scripts/conda-cpu/kaldifeat/meta.yaml
set(kaldifeat_VERSION "1.25.5")
set(kaldifeat_VERSION "1.3")
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
@ -19,102 +13,32 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(BUILD_RPATH_USE_ORIGIN TRUE)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
if(NOT APPLE)
set(kaldifeat_rpath_origin "$ORIGIN")
else()
set(kaldifeat_rpath_origin "@loader_path")
endif()
set(CMAKE_INSTALL_RPATH ${kaldifeat_rpath_origin})
set(CMAKE_BUILD_RPATH ${kaldifeat_rpath_origin})
set(CMAKE_INSTALL_RPATH "$ORIGIN")
set(CMAKE_BUILD_RPATH "$ORIGIN")
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "No CMAKE_BUILD_TYPE given, default to Release")
set(CMAKE_BUILD_TYPE Release)
endif()
if (NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.")
endif()
message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
set(CMAKE_CXX_EXTENSIONS OFF)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
option(BUILD_SHARED_LIBS "Whether to build shared libraries" ON)
option(kaldifeat_BUILD_TESTS "Whether to build tests or not" OFF)
option(kaldifeat_BUILD_PYMODULE "Whether to build python module or not" ON)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
if(BUILD_SHARED_LIBS AND MSVC)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
option(BUILD_TESTS "Whether to build tests or not" ON)
if(kaldifeat_BUILD_PYMODULE)
include(pybind11)
endif()
# to prevent cmake from trying to link with system installed mkl since we not directly use it
# mkl libraries should be linked with pytorch already
# ref: https://github.com/pytorch/pytorch/blob/master/cmake/public/mkl.cmake
set(CMAKE_DISABLE_FIND_PACKAGE_MKL TRUE)
include(pybind11)
include(torch)
if(kaldifeat_BUILD_TESTS)
if(BUILD_TESTS)
include(googletest)
enable_testing()
endif()
if(WIN32)
# disable various warnings for MSVC
# 4624: destructor was implicitly defined as deleted because a base class destructor is inaccessible or deleted
set(disabled_warnings
/wd4624
)
message(STATUS "Disabled warnings: ${disabled_warnings}")
foreach(w IN LISTS disabled_warnings)
string(APPEND CMAKE_CXX_FLAGS " ${w} ")
endforeach()
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
include_directories(${CMAKE_SOURCE_DIR})
add_subdirectory(kaldifeat)
# TORCH_VERSION is defined in cmake/torch.cmake
configure_file(
${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py.in
${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py @ONLY
)
configure_file(
${PROJECT_SOURCE_DIR}/cmake/kaldifeatConfigVersion.cmake.in
${PROJECT_BINARY_DIR}/kaldifeatConfigVersion.cmake
@ONLY
)
configure_file(
${PROJECT_SOURCE_DIR}/cmake/kaldifeatConfig.cmake.in
${PROJECT_BINARY_DIR}/kaldifeatConfig.cmake
@ONLY
)
install(FILES
${PROJECT_BINARY_DIR}/kaldifeatConfigVersion.cmake
${PROJECT_BINARY_DIR}/kaldifeatConfig.cmake
DESTINATION share/cmake/kaldifeat
)
install(FILES
${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py
DESTINATION ./
)

View File

@ -1,6 +0,0 @@
include LICENSE
include README.md
include CMakeLists.txt
exclude pyproject.toml
recursive-include kaldifeat *.*
recursive-include cmake *.*

149
README.md
View File

@ -1,33 +1,5 @@
# kaldifeat
<div align="center">
<img src="/doc/source/images/os-green.svg">
<img src="/doc/source/images/python_ge_3.6-blue.svg">
<img src="/doc/source/images/pytorch_ge_1.5.0-green.svg">
<img src="/doc/source/images/cuda_ge_10.1-orange.svg">
</div>
[![Documentation Status](https://github.com/csukuangfj/kaldifeat/actions/workflows/build-doc.yml/badge.svg)](https://csukuangfj.github.io/kaldifeat/)
**Documentation**: <https://csukuangfj.github.io/kaldifeat>
**Note**: If you are looking for a version that does not depend on PyTorch,
please see <https://github.com/csukuangfj/kaldi-native-fbank>
# Installation
Refer to
<https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html>
for installation.
> Never use `pip install kaldifeat`
> Never use `pip install kaldifeat`
> Never use `pip install kaldifeat`
<sub>
<table>
<tr>
@ -37,36 +9,6 @@ for installation.
<th>Usage</th>
</tr>
<tr>
<td>Fbank for <a href="https://github.com/openai/whisper">Whisper</a></td>
<td><code>kaldifeat.WhisperFbankOptions</code></td>
<td><code>kaldifeat.WhisperFbank</code></td>
<td>
<pre lang="python">
opts = kaldifeat.WhisperFbankOptions()
opts.device = torch.device('cuda', 0)
fbank = kaldifeat.WhisperFbank(opts)
features = fbank(wave)
</pre>
See <a href="https://github.com/csukuangfj/kaldifeat/pull/82">#82</a>
</td>
</tr>
<tr>
<td>Fbank for <a href="https://github.com/openai/whisper">Whisper-V3</a></td>
<td><code>kaldifeat.WhisperFbankOptions</code></td>
<td><code>kaldifeat.WhisperFbank</code></td>
<td>
<pre lang="python">
opts = kaldifeat.WhisperFbankOptions()
opts.num_mels = 128
opts.device = torch.device('cuda', 0)
fbank = kaldifeat.WhisperFbank(opts)
features = fbank(wave)
</pre>
</td>
</tr>
<tr>
<td>FBANK</td>
<td><code>kaldifeat.FbankOptions</code></td>
@ -82,17 +24,6 @@ features = fbank(wave)
</td>
</tr>
<tr>
<td>Streaming FBANK</td>
<td><code>kaldifeat.FbankOptions</code></td>
<td><code>kaldifeat.OnlineFbank</code></td>
<td>
See <a href="./kaldifeat/python/tests/test_fbank.py">
./kaldifeat/python/tests/test_fbank.py
</a>
</td>
</tr>
<tr>
<td>MFCC</td>
<td><code>kaldifeat.MfccOptions</code></td>
@ -100,24 +31,13 @@ See <a href="./kaldifeat/python/tests/test_fbank.py">
<td>
<pre lang="python">
opts = kaldifeat.MfccOptions();
opts.num_ceps = 13
opts.numceps = 13
mfcc = kaldifeat.Mfcc(opts)
features = mfcc(wave)
</pre>
</td>
</tr>
<tr>
<td>Streaming MFCC</td>
<td><code>kaldifeat.MfccOptions</code></td>
<td><code>kaldifeat.OnlineMfcc</code></td>
<td>
See <a href="./kaldifeat/python/tests/test_mfcc.py">
./kaldifeat/python/tests/test_mfcc.py
</a>
</td>
</tr>
<tr>
<td>PLP</td>
<td><code>kaldifeat.PlpOptions</code></td>
@ -132,17 +52,6 @@ features = plp(wave)
</td>
</tr>
<tr>
<td>Streaming PLP</td>
<td><code>kaldifeat.PlpOptions</code></td>
<td><code>kaldifeat.OnlinePlp</code></td>
<td>
See <a href="./kaldifeat/python/tests/test_plp.py">
./kaldifeat/python/tests/test_plp.py
</a>
</td>
</tr>
<tr>
<td>Spectorgram</td>
<td><code>kaldifeat.SpectrogramOptions</code></td>
@ -172,8 +81,6 @@ The following kaldi-compatible commandline tools are implemented:
(**NOTE**: We will implement other types of features, e.g., Pitch, ivector, etc, soon.)
**HINT**: It supports also streaming feature extractors for Fbank, MFCC, and Plp.
# Usage
Let us first generate a test wave using sox:
@ -285,12 +192,7 @@ Please refer to
- [kaldifeat/python/tests/test_mfcc.py](kaldifeat/python/tests/test_mfcc.py)
- [kaldifeat/python/tests/test_plp.py](kaldifeat/python/tests/test_plp.py)
- [kaldifeat/python/tests/test_spectrogram.py](kaldifeat/python/tests/test_spectrogram.py)
- [kaldifeat/python/tests/test_frame_extraction_options.py](kaldifeat/python/tests/test_frame_extraction_options.py)
- [kaldifeat/python/tests/test_mel_bank_options.py](kaldifeat/python/tests/test_mel_bank_options.py)
- [kaldifeat/python/tests/test_fbank_options.py](kaldifeat/python/tests/test_fbank_options.py)
- [kaldifeat/python/tests/test_mfcc_options.py](kaldifeat/python/tests/test_mfcc_options.py)
- [kaldifeat/python/tests/test_spectrogram_options.py](kaldifeat/python/tests/test_spectrogram_options.py)
- [kaldifeat/python/tests/test_plp_options.py](kaldifeat/python/tests/test_plp_options.py)
- [kaldifeat/python/tests/test_options.py](kaldifeat/python/tests/test_options.py)
for more examples.
@ -299,31 +201,36 @@ for more examples.
- ``kaldifeat`` supports batch processing as well as chunk processing
- ``kaldifeat`` uses the same options as `Kaldi`'s `compute-fbank-feats` and `compute-mfcc-feats`
# Usage in other projects
# Installation
## icefall
## From PyPi with pip
[icefall](https://github.com/k2-fsa/icefall) uses kaldifeat to extract features for a pre-trained model.
If you install `kaldifeat` using `pip`, it will also install
PyTorch 1.8.1. If this is not what you want (i.e, you have installed a
different version of PyTorch and you don't want to replace it
with PyTorch 1.8.1), please add an option `--no-dependencies` to
`pip install`.
See <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/conformer_ctc/pretrained.py>.
```bash
pip install kaldifeat # also installs torch 1.8.1
pip install --no-dependencies kaldifeat # will NOT install torch 1.8.1
```
## k2
## From source
[k2](https://github.com/k2-fsa/k2) uses kaldifeat's C++ API.
See <https://github.com/k2-fsa/k2/blob/v2.0-pre/k2/torch/csrc/features.cu>.
## lhotse
[lhotse](https://github.com/lhotse-speech/lhotse) uses kaldifeat to extract features on GPU.
See <https://github.com/lhotse-speech/lhotse/blob/master/lhotse/features/kaldifeat.py>.
## sherpa
[sherpa](https://github.com/k2-fsa/sherpa) uses kaldifeat for streaming feature
extraction.
See <https://github.com/k2-fsa/sherpa/blob/master/sherpa/bin/pruned_stateless_emformer_rnnt2/decode.py>
The following are the commands to compile `kaldifeat` from source.
We assume that you have installed `cmake` and PyTorch.
cmake 3.11 is known to work. Other cmake versions may also work.
PyTorch 1.8.1 is known to work. Other PyTorch versions may also work.
```bash
mkdir /some/path
git clone https://github.com/csukuangfj/kaldifeat.git
cd kaldifeat
python setup.py install
```
To test whether `kaldifeat` was installed successfully, you can run:
```bash
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
```

View File

@ -2,34 +2,19 @@
import glob
import os
import platform
import shutil
import sys
from pathlib import Path
import setuptools
import torch
from setuptools.command.build_ext import build_ext
def get_pytorch_version():
# if it is 1.7.1+cuda101, then strip +cuda101
return torch.__version__.split("+")[0]
def is_for_pypi():
ans = os.environ.get("KALDIFEAT_IS_FOR_PYPI", None)
return ans is not None
def is_macos():
return platform.system() == "Darwin"
def is_windows():
return platform.system() == "Windows"
try:
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
@ -37,14 +22,15 @@ try:
def finalize_options(self):
_bdist_wheel.finalize_options(self)
# In this case, the generated wheel has a name in the form
# kaldifeat-xxx-pyxx-none-any.whl
if is_for_pypi() and not is_macos():
# k2-xxx-pyxx-none-any.whl
if is_for_pypi():
self.root_is_pure = True
else:
# The generated wheel has a name ending with
# -linux_x86_64.whl
self.root_is_pure = False
except ImportError:
bdist_wheel = None
@ -72,67 +58,33 @@ class BuildExtension(build_ext):
if cmake_args == "":
cmake_args = "-DCMAKE_BUILD_TYPE=Release"
extra_cmake_args = " -Dkaldifeat_BUILD_TESTS=OFF "
extra_cmake_args += f" -DCMAKE_INSTALL_PREFIX={Path(self.build_lib).resolve()}/kaldifeat " # noqa
major, minor = get_pytorch_version().split(".")[:2]
print("major, minor", major, minor)
major = int(major)
minor = int(minor)
if major > 2 or (major == 2 and minor >= 1):
extra_cmake_args += f" -DCMAKE_CXX_STANDARD=17 "
if make_args == "" and system_make_args == "":
print("For fast compilation, run:")
print('export KALDIFEAT_MAKE_ARGS="-j"; python setup.py install')
if "PYTHON_EXECUTABLE" not in cmake_args:
print(f"Setting PYTHON_EXECUTABLE to {sys.executable}")
cmake_args += f" -DPYTHON_EXECUTABLE={sys.executable}"
cmake_args += extra_cmake_args
build_cmd = f"""
cd {self.build_temp}
if is_windows():
build_cmd = f"""
cmake {cmake_args} -B {self.build_temp} -S {kaldifeat_dir}
cmake --build {self.build_temp} --target _kaldifeat --config Release -- -m
cmake --build {self.build_temp} --target install --config Release -- -m
"""
print(f"build command is:\n{build_cmd}")
ret = os.system(
f"cmake {cmake_args} -B {self.build_temp} -S {kaldifeat_dir}"
cmake {cmake_args} {kaldifeat_dir}
make {make_args} _kaldifeat
"""
print(f"build command is:\n{build_cmd}")
ret = os.system(build_cmd)
if ret != 0:
raise Exception(
"\nBuild kaldifeat failed. Please check the error message.\n"
"You can ask for help by creating an issue on GitHub.\n"
"\nClick:\n\thttps://github.com/csukuangfj/kaldifeat/issues/new\n" # noqa
)
if ret != 0:
raise Exception("Failed to configure kaldifeat")
ret = os.system(
f"cmake --build {self.build_temp} --target _kaldifeat --config Release -- -m"
)
if ret != 0:
raise Exception("Failed to build kaldifeat")
ret = os.system(
f"cmake --build {self.build_temp} --target install --config Release -- -m"
)
if ret != 0:
raise Exception("Failed to install kaldifeat")
else:
if make_args == "" and system_make_args == "":
print("For fast compilation, run:")
print('export KALDIFEAT_MAKE_ARGS="-j"; python setup.py install')
make_args = " -j4 "
print("Setting make_args to '-j4'")
build_cmd = f"""
cd {self.build_temp}
cmake {cmake_args} {kaldifeat_dir}
make {make_args} _kaldifeat install
"""
print(f"build command is:\n{build_cmd}")
ret = os.system(build_cmd)
if ret != 0:
raise Exception(
"\nBuild kaldifeat failed. Please check the error message.\n"
"You can ask for help by creating an issue on GitHub.\n"
"\nClick:\n\thttps://github.com/csukuangfj/kaldifeat/issues/new\n" # noqa
)
lib_so = glob.glob(f"{self.build_temp}/lib/*kaldifeat*.so")
for so in lib_so:
print(f"Copying {so} to {self.build_lib}/")
shutil.copy(f"{so}", f"{self.build_lib}/")

View File

@ -18,34 +18,14 @@ function(download_googltest)
# FetchContent is available since 3.11,
# we've copied it to ${CMAKE_SOURCE_DIR}/cmake/Modules
# so that it can be used in lower CMake versions.
message(STATUS "Use FetchContent provided by kaldifeat")
message(STATUS "Use FetchContent provided by k2")
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
endif()
include(FetchContent)
set(googletest_URL "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
set(googletest_URL2 "https://huggingface.co/csukuangfj/k2-cmake-deps/resolve/main/googletest-1.13.0.tar.gz")
set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363")
# If you don't have access to the Internet,
# please pre-download googletest
set(possible_file_locations
$ENV{HOME}/Downloads/googletest-1.13.0.tar.gz
${PROJECT_SOURCE_DIR}/googletest-1.13.0.tar.gz
${PROJECT_BINARY_DIR}/googletest-1.13.0.tar.gz
/tmp/googletest-1.13.0.tar.gz
/star-fj/fangjun/download/github/googletest-1.13.0.tar.gz
)
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(googletest_URL "${f}")
file(TO_CMAKE_PATH "${googletest_URL}" googletest_URL)
set(googletest_URL2)
break()
endif()
endforeach()
set(googletest_URL "https://github.com/google/googletest/archive/release-1.10.0.tar.gz")
set(googletest_HASH "SHA256=9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb")
set(BUILD_GMOCK ON CACHE BOOL "" FORCE)
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
@ -53,15 +33,13 @@ function(download_googltest)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_Declare(googletest
URL
${googletest_URL}
${googletest_URL2}
URL ${googletest_URL}
URL_HASH ${googletest_HASH}
)
FetchContent_GetProperties(googletest)
if(NOT googletest_POPULATED)
message(STATUS "Downloading googletest from ${googletest_URL}")
message(STATUS "Downloading googletest")
FetchContent_Populate(googletest)
endif()
message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}")

View File

@ -1,65 +0,0 @@
# Findkaldifeat
# -------------
#
# Finds the kaldifeat library
#
# This will define the following variables:
#
# KALDIFEAT_FOUND -- True if the system has the kaldifeat library
# KALDIFEAT_INCLUDE_DIRS -- The include directories for kaldifeat
# KALDIFEAT_LIBRARIES -- Libraries to link against
# KALDIFEAT_CXX_FLAGS -- Additional (required) compiler flags
# KALDIFEAT_TORCH_VERSION_MAJOR -- The major version of PyTorch used to compile kaldifeat
# KALDIFEAT_TORCH_VERSION_MINOR -- The minor version of PyTorch used to compile kaldifeat
# KALDIFEAT_VERSION -- The version of kaldifeat
#
# and the following imported targets:
#
# kaldifeat_core
# This file is modified from pytorch/cmake/TorchConfig.cmake.in
set(KALDIFEAT_CXX_FLAGS "@CMAKE_CXX_FLAGS@")
set(KALDIFEAT_TORCH_VERSION_MAJOR @KALDIFEAT_TORCH_VERSION_MAJOR@)
set(KALDIFEAT_TORCH_VERSION_MINOR @KALDIFEAT_TORCH_VERSION_MINOR@)
set(KALDIFEAT_VERSION @kaldifeat_VERSION@)
if(DEFINED ENV{KALDIFEAT_INSTALL_PREFIX})
set(KALDIFEAT_INSTALL_PREFIX $ENV{KALDIFEAT_INSTALL_PREFIX})
else()
# Assume we are in <install-prefix>/share/cmake/kaldifeat/kaldifeatConfig.cmake
get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(KALDIFEAT_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
endif()
set(KALDIFEAT_INCLUDE_DIRS ${KALDIFEAT_INSTALL_PREFIX}/include)
set(KALDIFEAT_LIBRARIES kaldifeat_core)
foreach(lib IN LISTS KALDIFEAT_LIBRARIES)
find_library(location_${lib} ${lib}
PATHS
"${KALDIFEAT_INSTALL_PREFIX}/lib"
"${KALDIFEAT_INSTALL_PREFIX}/lib64"
)
if(NOT MSVC)
add_library(${lib} SHARED IMPORTED)
else()
add_library(${lib} STATIC IMPORTED)
endif()
set_target_properties(${lib} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${KALDIFEAT_INCLUDE_DIRS}"
IMPORTED_LOCATION "${location_${lib}}"
CXX_STANDARD 14
)
set_property(TARGET ${lib} PROPERTY INTERFACE_COMPILE_OPTIONS @CMAKE_CXX_FLAGS@)
endforeach()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(kaldifeat DEFAULT_MSG
location_kaldifeat_core
)

View File

@ -1,12 +0,0 @@
# This file is modified from pytorch/cmake/TorchConfigVersion.cmake.in
set(PACKAGE_VERSION "@kaldifeat_VERSION@")
# Check whether the requested PACKAGE_FIND_VERSION is compatible
if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
set(PACKAGE_VERSION_COMPATIBLE TRUE)
if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()

View File

@ -8,39 +8,23 @@ function(download_pybind11)
include(FetchContent)
set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.12.0.tar.gz")
set(pybind11_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/pybind11-2.12.0.tar.gz")
set(pybind11_HASH "SHA256=bf8f242abd1abcd375d516a7067490fb71abd79519a282d22b6e4d19282185a7")
# If you don't have access to the Internet,
# please pre-download pybind11
set(possible_file_locations
$ENV{HOME}/Downloads/pybind11-2.12.0.tar.gz
${CMAKE_SOURCE_DIR}/pybind11-2.12.0.tar.gz
${CMAKE_BINARY_DIR}/pybind11-2.12.0.tar.gz
/tmp/pybind11-2.12.0.tar.gz
/star-fj/fangjun/download/github/pybind11-2.12.0.tar.gz
)
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(pybind11_URL "${f}")
file(TO_CMAKE_PATH "${pybind11_URL}" pybind11_URL)
set(pybind11_URL2)
break()
endif()
endforeach()
set(pybind11_URL "https://github.com/pybind/pybind11/archive/v2.6.0.tar.gz")
set(pybind11_HASH "SHA256=90b705137b69ee3b5fc655eaca66d0dc9862ea1759226f7ccd3098425ae69571")
set(double_quotes "\"")
set(dollar "\$")
set(semicolon "\;")
FetchContent_Declare(pybind11
URL
${pybind11_URL}
${pybind11_URL2}
URL ${pybind11_URL}
URL_HASH ${pybind11_HASH}
PATCH_COMMAND
sed -i s/\\${double_quotes}-flto\\\\${dollar}/\\${double_quotes}-Xcompiler=-flto${dollar}/g "tools/pybind11Tools.cmake" &&
sed -i s/${seimcolon}-fno-fat-lto-objects/${seimcolon}-Xcompiler=-fno-fat-lto-objects/g "tools/pybind11Tools.cmake"
)
FetchContent_GetProperties(pybind11)
if(NOT pybind11_POPULATED)
message(STATUS "Downloading pybind11 from ${pybind11_URL}")
message(STATUS "Downloading pybind11")
FetchContent_Populate(pybind11)
endif()
message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}")

View File

@ -8,7 +8,6 @@ execute_process(
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TORCH_DIR
)
message(STATUS "TORCH_DIR: ${TORCH_DIR}")
list(APPEND CMAKE_PREFIX_PATH "${TORCH_DIR}")
find_package(Torch REQUIRED)
@ -25,14 +24,16 @@ execute_process(
message(STATUS "PyTorch version: ${TORCH_VERSION}")
execute_process(
COMMAND "${PYTHON_EXECUTABLE}" -c "import torch; print(torch.__version__.split('.')[0])"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE KALDIFEAT_TORCH_VERSION_MAJOR
# Solve the following error for NVCC:
# unknown option `-Wall`
#
# It contains only some -Wno-* flags, so it is OK
# to set them to empty
set_property(TARGET torch_cuda
PROPERTY
INTERFACE_COMPILE_OPTIONS ""
)
execute_process(
COMMAND "${PYTHON_EXECUTABLE}" -c "import torch; print(torch.__version__.split('.')[1])"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE KALDIFEAT_TORCH_VERSION_MINOR
set_property(TARGET torch_cpu
PROPERTY
INTERFACE_COMPILE_OPTIONS ""
)

View File

@ -1,6 +1,6 @@
dataclasses
recommonmark
sphinx<7.0
sphinx
sphinx-autodoc-typehints
sphinx_rtd_theme
sphinxcontrib-bibtex

72
doc/source/code/test_fbank.py Executable file
View File

@ -0,0 +1,72 @@
#!/usr/bin/env python3
# Copyright 2021 Xiaomi Corporation (authors: Fangjun Kuang)
import numpy as np
import soundfile as sf
import torch
import kaldifeat
def read_wave(filename) -> torch.Tensor:
"""Read a wave file and return it as a 1-D tensor.
Note:
You don't need to scale it to [-32768, 32767].
We use scaling here to follow the approach in Kaldi.
Args:
filename:
Filename of a sound file.
Returns:
Return a 1-D tensor containing audio samples.
"""
with sf.SoundFile(filename) as sf_desc:
sampling_rate = sf_desc.samplerate
assert sampling_rate == 16000
data = sf_desc.read(dtype=np.float32, always_2d=False)
data *= 32768
return torch.from_numpy(data)
def test_fbank():
device = torch.device("cpu")
if torch.cuda.is_available():
device = torch.device("cuda", 0)
wave0 = read_wave("test_data/test.wav")
wave1 = read_wave("test_data/test2.wav")
wave0 = wave0.to(device)
wave1 = wave1.to(device)
opts = kaldifeat.FbankOptions()
opts.frame_opts.dither = 0
opts.device = device
fbank = kaldifeat.Fbank(opts)
# We can compute fbank features in batches
features = fbank([wave0, wave1])
assert isinstance(features, list), f"{type(features)}"
assert len(features) == 2
# We can also compute fbank features for a single wave
features0 = fbank(wave0)
features1 = fbank(wave1)
assert torch.allclose(features[0], features0)
assert torch.allclose(features[1], features1)
# To compute fbank features for only a specified frame
audio_frames = fbank.convert_samples_to_frames(wave0)
feature_frame_1 = fbank.compute(audio_frames[1])
feature_frame_10 = fbank.compute(audio_frames[10])
assert torch.allclose(features0[1], feature_frame_1)
assert torch.allclose(features0[10], feature_frame_10)
if __name__ == "__main__":
test_fbank()

View File

@ -59,7 +59,7 @@ templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["images/*.md"]
exclude_patterns = []
source_suffix = {
".rst": "restructuredtext",
@ -102,35 +102,3 @@ html_theme_options = {
"prev_next_buttons_location": "bottom",
"style_external_links": True,
}
rst_epilog = """
.. _kaldifeat: https://github.com/csukuangfj/kaldifeat
.. _Kaldi: https://github.com/kaldi-asr/kaldi
.. _PyTorch: https://pytorch.org/
.. _kaldifeat.Fbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L10
.. _kaldifeat.Mfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L10
.. _kaldifeat.Plp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L10
.. _kaldifeat.Spectrogram: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/spectrogram.py#L9
.. _kaldifeat.OnlineFbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L16
.. _kaldifeat.OnlineMfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L16
.. _kaldifeat.OnlinePlp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L16
.. _compute-fbank-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-fbank-feats.cc
.. _compute-mfcc-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-mfcc-feats.cc
.. _compute-plp-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-plp-feats.cc
.. _compute-spectrogram-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-spectrogram-feats.cc
.. _kaldi::OnlineFbank: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L160
.. _kaldi::OnlineMfcc: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L158
.. _kaldi::OnlinePlp: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L159
.. _kaldifeat.FbankOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-fbank.h#L19
.. _kaldi::FbankOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.h#L41
.. _kaldifeat.MfccOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-mfcc.h#L22
.. _kaldi::MfccOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-mfcc.h#L38
.. _kaldifeat.PlpOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-plp.h#L24
.. _kaldi::PlpOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-plp.h#L42
.. _kaldifeat.SpectrogramOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-spectrogram.h#L18
.. _kaldi::SpectrogramOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-spectrogram.h#L38
.. _kaldifeat.FrameExtractionOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-window.h#L30
.. _kaldi::FrameExtractionOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-window.h#L35
.. _kaldifeat.MelBanksOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/mel-computations.h#L17
.. _kaldi::MelBanksOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/mel-computations.h#L43
"""

View File

@ -1,8 +0,0 @@
## File description
<https://shields.io/> is used to create the following files:
- ./os.svg
- ./python_ge_3.6-blue.svg
- ./cuda_ge_10.1-orange.svg
- ./pytorch_ge_1.5.0-green.svg

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="94" height="20" role="img" aria-label="cuda: &gt;= 10.1"><title>cuda: &gt;= 10.1</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="94" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="37" height="20" fill="#555"/><rect x="37" width="57" height="20" fill="#fe7d37"/><rect width="94" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="195" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="270">cuda</text><text x="195" y="140" transform="scale(.1)" fill="#fff" textLength="270">cuda</text><text aria-hidden="true" x="645" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">&gt;= 10.1</text><text x="645" y="140" transform="scale(.1)" fill="#fff" textLength="470">&gt;= 10.1</text></g></svg>

Before

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="176" height="20" role="img" aria-label="os: Linux | macOS | Windows"><title>os: Linux | macOS | Windows</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="176" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="153" height="20" fill="#97ca00"/><rect width="176" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">os</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">os</text><text aria-hidden="true" x="985" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1430">Linux | macOS | Windows</text><text x="985" y="140" transform="scale(.1)" fill="#fff" textLength="1430">Linux | macOS | Windows</text></g></svg>

Before

Width:  |  Height:  |  Size: 1.2 KiB

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: &gt;= 3.6"><title>python: &gt;= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">&gt;= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">&gt;= 3.6</text></g></svg>

Before

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="112" height="20" role="img" aria-label="pytorch: &gt;= 1.5.0"><title>pytorch: &gt;= 1.5.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="112" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="51" height="20" fill="#555"/><rect x="51" width="61" height="20" fill="#97ca00"/><rect width="112" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="265" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="410">pytorch</text><text x="265" y="140" transform="scale(.1)" fill="#fff" textLength="410">pytorch</text><text aria-hidden="true" x="805" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">&gt;= 1.5.0</text><text x="805" y="140" transform="scale(.1)" fill="#fff" textLength="510">&gt;= 1.5.0</text></g></svg>

Before

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -6,11 +6,19 @@
kaldifeat
=========
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ implements
feature extraction algorithms **compatible** with kaldi using PyTorch, supporting CUDA
as well as autograd.
Currently, only fbank features are supported.
It can produce the same feature output as ``compute-fbank-feats`` (from kaldi)
when given the same options.
.. toctree::
:maxdepth: 2
:caption: Contents
:caption: Contents:
intro
installation/index
usage/index
installation
usage

View File

@ -0,0 +1,54 @@
Installation
============
.. _from source:
Install kaldifeat from source
-----------------------------
You have to install ``cmake`` and ``PyTorch`` first.
- ``cmake`` 3.11 is known to work. Other CMake versions may also work.
- ``PyTorch`` 1.8.1 is known to work. Other PyTorch versions may also work.
- Python >= 3.6
The commands to install ``kaldifeat`` from source are:
.. code-block:: bash
git clone https://github.com/csukuangfj/kaldifeat
cd kaldifeat
python3 setup.py install
To test that you have installed ``kaldifeat`` successfully, please run:
.. code-block:: bash
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
It should print the version, e.g., ``1.0``.
Install kaldifeat from PyPI
---------------------------
The pre-built ``kaldifeat`` hosted on PyPI uses PyTorch 1.8.1.
If you install ``kaldifeat`` using pip, it will replace your locally
installed PyTorch automatically with PyTorch 1.8.1.
If you don't want this happen, please `Install kaldifeat from source`_.
The command to install ``kaldifeat`` from PyPI is:
.. code-block:: bash
pip install kaldifeat
To test that you have installed ``kaldifeat`` successfully, please run:
.. code-block:: bash
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
It should print the version, e.g., ``1.0``.

View File

@ -1,48 +0,0 @@
FAQs
====
How to install a CUDA version of kaldifeat from source
------------------------------------------------------
You need to first install a CUDA version of `PyTorch`_ and then install `kaldifeat`_.
.. note::
You can use a CUDA version of `kaldifeat`_ on machines with no GPUs.
How to install a CPU version of kaldifeat from source
-----------------------------------------------------
You need to first install a CPU version of `PyTorch`_ and then install `kaldifeat`_.
How to fix `Caffe2: Cannot find cuDNN library`
----------------------------------------------
.. code-block::
Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN
libraries. Please set the proper cuDNN prefixes and / or install cuDNN.
You will have such an error when you want to install a CUDA version of `kaldifeat`_
by ``pip install kaldifeat`` or from source.
You need to first install cuDNN. Assume you have installed cuDNN to the
path ``/path/to/cudnn``. You can fix the error by using ``one`` of the following
commands.
(1) Fix for installation using ``pip install``
.. code-block:: bash
export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
pip install --verbose kaldifeat
(2) Fix for installation from source
.. code-block:: bash
mkdir /some/path
git clone https://github.com/csukuangfj/kaldifeat.git
cd kaldifeat
export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
python setup.py install

View File

@ -1,47 +0,0 @@
.. _from source:
Install kaldifeat from source
=============================
You have to install ``cmake`` and `PyTorch`_ first.
- ``cmake`` 3.11 is known to work. Other CMake versions may also work.
- `PyTorch`_ >= 1.5.0 is known to work. Other PyTorch versions may also work.
- Python >= 3.6
- A compiler that supports C++ 14
The commands to install `kaldifeat`_ from source are:
.. code-block:: bash
git clone https://github.com/csukuangfj/kaldifeat
cd kaldifeat
python3 setup.py install
To test that you have installed `kaldifeat`_ successfully, please run:
.. code-block:: bash
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
It should print the version, e.g., ``1.0``.
.. _from PyPI:
Install kaldifeat from PyPI
---------------------------
The command to install `kaldifeat`_ from PyPI is:
.. code-block:: bash
pip install --verbose kaldifeat
To test that you have installed `kaldifeat`_ successfully, please run:
.. code-block:: bash
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
It should print the version, e.g., ``1.0``.

View File

@ -1,139 +0,0 @@
From pre-compiled wheels (Recommended)
=======================================
You can find pre-compiled wheels at
- CPU wheels: `<https://csukuangfj.github.io/kaldifeat/cpu.html>`_
- CUDA wheels: `<https://csukuangfj.github.io/kaldifeat/cuda.html>`_
We give a few examples below to show you how to install `kaldifeat`_ from
pre-compiled wheels.
.. hint::
The following lists only some examples. We suggest that you always select the
latest version of ``kaldifeat``.
Linux (CPU)
-----------
Suppose you want to install the following wheel:
.. code-block:: bash
https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
you can use one of the following methods:
.. code-block:: bash
# method 1
pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
# For users from China
# 中国国内用户,如果访问不了 huggingface, 请使用
# pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
# method 2
pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
# For users from China
# 中国国内用户,如果访问不了 huggingface, 请使用
# wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Windows (CPU)
--------------
Suppose you want to install the following wheel:
.. code-block:: bash
https://huggingface.co/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
you can use one of the following methods:
.. code-block:: bash
# method 1
pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
# For users from China
# 中国国内用户,如果访问不了 huggingface, 请使用
# pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
# method 2
pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
# For users from China
# 中国国内用户,如果访问不了 huggingface, 请使用
# wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
macOS (CPU)
-----------
Suppose you want to install the following wheel:
.. code-block:: bash
https://huggingface.co/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
you can use one of the following methods:
.. code-block:: bash
# method 1
pip install torch==2.4.0
pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
# For users from China
# 中国国内用户,如果访问不了 huggingface, 请使用
# pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
# method 2
pip install torch==2.4.0 -f https://download.pytorch.org/whl/torch/
wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
# For users from China
# 中国国内用户,如果访问不了 huggingface, 请使用
# wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
Linux (CUDA)
------------
Suppose you want to install the following wheel:
.. code-block:: bash
https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
you can use one of the following methods:
.. code-block:: bash
# method 1
pip install torch==2.4.0+cu124 -f https://download.pytorch.org/whl/torch/
pip install kaldifeat==1.25.4.dev20240725+cuda12.4.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cuda.html
# For users from China
# 中国国内用户,如果访问不了 huggingface, 请使用
# pip install kaldifeat==1.25.4.dev20240725+cuda12.4.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cuda-cn.html
# method 2
pip install torch==2.4.0+cu124 -f https://download.pytorch.org/whl/torch/
wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
# For users from China
# 中国国内用户,如果访问不了 huggingface, 请使用
# wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pip install ./kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

View File

@ -1,11 +0,0 @@
Installation
============
.. toctree::
:maxdepth: 3
./from_wheels.rst
./from_source.rst
./faq.rst

View File

@ -1,103 +0,0 @@
Introduction
============
`kaldifeat`_ implements
speech feature extraction algorithms **compatible** with `Kaldi`_ using `PyTorch`_,
supporting CUDA as well as autograd.
`kaldifeat`_ has the following features:
- Fully compatible with `Kaldi`_
.. note::
The underlying C++ code is copied & modified from `Kaldi`_ directly.
It is rewritten with `PyTorch` C++ APIs.
- Provide not only ``C++ APIs`` but also ``Python APIs``
.. note::
You can access `kaldifeat`_ from ``Python``.
- Support autograd
- Support ``CUDA`` and ``CPU``
.. note::
You can use CUDA for feature extraction.
- Support ``online`` (i.e., ``streaming``) and ``offline`` (i.e., ``non-streaming``)
feature extraction
- Support chunk-based processing
.. note::
This is especially usefull if you want to process audios of several
hours long, which may cause OOM if you send them for computation at once.
With chunk-based processing, you can process audios of arbirtray length.
- Support batch processing
.. note::
With `kaldifeat`_ you can extract features for a batch of audios
.. see https://sublime-and-sphinx-guide.readthedocs.io/en/latest/tables.html
Currently implemented speech features and their counterparts in `Kaldi`_ are
listed in the following table.
.. list-table:: Supported speech features
:widths: 50 50
:header-rows: 1
* - Supported speech features
- Counterpart in `Kaldi`_
* - `kaldifeat.Fbank`_
- `compute-fbank-feats`_
* - `kaldifeat.Mfcc`_
- `compute-mfcc-feats`_
* - `kaldifeat.Plp`_
- `compute-plp-feats`_
* - `kaldifeat.Spectrogram`_
- `compute-spectrogram-feats`_
* - `kaldifeat.OnlineFbank`_
- `kaldi::OnlineFbank`_
* - `kaldifeat.OnlineMfcc`_
- `kaldi::OnlineMfcc`_
* - `kaldifeat.OnlinePlp`_
- `kaldi::OnlinePlp`_
Each feature computer needs an option. The following table lists the options
for each computer and the corresponding options in `Kaldi`_.
.. hint::
Note that we reuse the parameter names from `Kaldi`_.
Also, both online feature computers and offline feature computers share the
same option.
.. list-table:: Feature computer options
:widths: 50 50
:header-rows: 1
* - Options in `kaldifeat`_
- Corresponding options in `Kaldi`_
* - `kaldifeat.FbankOptions`_
- `kaldi::FbankOptions`_
* - `kaldifeat.MfccOptions`_
- `kaldi::MfccOptions`_
* - `kaldifeat.PlpOptions`_
- `kaldi::PlpOptions`_
* - `kaldifeat.SpectrogramOptions`_
- `kaldi::SpectrogramOptions`_
* - `kaldifeat.FrameExtractionOptions`_
- `kaldi::FrameExtractionOptions`_
* - `kaldifeat.MelBanksOptions`_
- `kaldi::MelBanksOptions`_
Read more to learn how to install `kaldifeat`_ and how to use each feature
computer.

212
doc/source/usage.rst Normal file
View File

@ -0,0 +1,212 @@
Usage
=====
Let us first see the help message of kaldi's ``compute-fbank-feats``:
.. code-block:: bash
$ compute-fbank-feats
Create Mel-filter bank (FBANK) feature files.
Usage: compute-fbank-feats [options...] <wav-rspecifier> <feats-wspecifier>
Options:
--allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
--allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
--blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42)
--channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
--debug-mel : Print out debugging information for mel bin computation (bool, default = false)
--dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
--energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0)
--frame-length : Frame length in milliseconds (float, default = 25)
--frame-shift : Frame shift in milliseconds (float, default = 10)
--high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
--htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
--low-freq : Low cutoff frequency for mel bins (float, default = 20)
--max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
--min-duration : Minimum duration of segments to process (in seconds). (float, default = 0)
--num-mel-bins : Number of triangular mel-frequency bins (int, default = 23)
--output-format : Format of the output files [kaldi, htk] (string, default = "kaldi")
--preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97)
--raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true)
--remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true)
--round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
--sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
--snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
--subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false)
--use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false)
--use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true)
--use-power : If true, use power, else use magnitude. (bool, default = true)
--utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
--vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
--vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
--vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
--vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
--window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
--write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
Standard options:
--config : Configuration file to read (this option may be repeated) (string, default = "")
--help : Print out usage message (bool, default = false)
--print-args : Print the command line arguments (to stderr) (bool, default = true)
--verbose : Verbose level (higher->more logging) (int, default = 0)
FbankOptions
------------
``kaldifeat`` reuses the same options from kaldi's ``compute-fbank-feats``.
The following shows the default values of ``kaldifeat.FbankOptions``:
.. code-block:: python
>>> import kaldifeat
>>> fbank_opts = kaldifeat.FbankOptions()
>>> print(fbank_opts)
frame_opts:
samp_freq: 16000
frame_shift_ms: 10
frame_length_ms: 25
dither: 1
preemph_coeff: 0.97
remove_dc_offset: 1
window_type: povey
round_to_power_of_two: 1
blackman_coeff: 0.42
snip_edges: 1
mel_opts:
num_bins: 23
low_freq: 20
high_freq: 0
vtln_low: 100
vtln_high: -500
debug_mel: 0
htk_mode: 0
use_energy: 0
energy_floor: 0
raw_energy: 1
htk_compat: 0
use_log_fbank: 1
use_power: 1
device: cpu
It consists of three parts:
- ``frame_opts``
Options in this part are accessed by ``frame_opts.xxx``. That is, to access
the sample rate, you use:
.. code-block:: python
>>> fbank_opts = kaldifeat.FbankOptions()
>>> print(fbank_opts.frame_opts.samp_freq)
16000.0
- ``mel_opts``
Options in this part are accessed by ``mel_opts.xxx``. That is, to access
the number of mel bins, you use:
.. code-block:: python
>>> fbank_opts = kaldifeat.FbankOptions()
>>> print(fbank_opts.mel_opts.num_bins)
23
- fbank related
Options in this part are accessed directly. That is, to access the device
field, you use:
.. code-block::
>>> print(fbank_opts.device)
cpu
>>> fbank_opts.device = 'cuda:0'
>>> print(fbank_opts.device)
cuda:0
>>> import torch
>>> fbank_opts.device = torch.device('cuda', 0)
>>> print(fbank_opts.device)
cuda:0
To change the sample rate to 8000, you can use:
.. code-block:: python
>>> fbank_opts = kaldifeat.FbankOptions()
>>> print(fbank_opts.frame_opts.samp_freq)
16000.0
>>> fbank_opts.frame_opts.samp_freq = 8000
>>> print(fbank_opts.frame_opts.samp_freq)
8000.0
To change ``snip_edges`` to ``False``, you can use:
.. code-block:: python
>>> fbank_opts.frame_opts.snip_edges = False
>>> print(fbank_opts.frame_opts.snip_edges)
False
To change number of mel bins to 80, you can use:
.. code-block:: python
>>> print(fbank_opts.mel_opts.num_bins)
23
>>> fbank_opts.mel_opts.num_bins = 80
>>> print(fbank_opts.mel_opts.num_bins)
80
To change the device to ``cuda``, you can use:
Fbank
-----
The following shows how to use ``kaldifeat.Fbank`` to compute
the fbank features of sound files.
First, let us generate two sound files using ``sox``:
.. code-block:: bash
# generate a wav of two seconds, containing a sine-wave
# swept from 300 Hz to 3300 Hz
sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300
# another sound file with 0.5 seconds
sox -n -r 16000 -b 16 test2.wav synth 0.5 sine 300-3300
.. hint::
You can find the above two files by visiting the following two links:
- `test.wav <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_data/test.wav>`_
- `test2.wav <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_data/test2.wav>`_
The `following code <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_fbank.py>`_
shows the usage of ``kaldifeat.Fbank``.
It shows:
- How to read a sound file. Note that audio samples are scaled to the range [-32768, 32768].
The intention is to produce the same output as kaldi. You don't need to scale it if
you don't care about the compatibility with kaldi
- ``kaldifeat.Fbank`` supports CUDA as well as CPU
- ``kaldifeat.Fbank`` supports processing sound file in a batch as well as accepting
a single sound file
.. literalinclude:: ./code/test_fbank.py
:caption: Demo of ``kaldifeat.Fbank``
:language: python

View File

@ -1,46 +0,0 @@
compute-fbank-feats
Create Mel-filter bank (FBANK) feature files.
Usage: compute-fbank-feats [options...] <wav-rspecifier> <feats-wspecifier>
Options:
--allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
--allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
--blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42)
--channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
--debug-mel : Print out debugging information for mel bin computation (bool, default = false)
--dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
--energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0)
--frame-length : Frame length in milliseconds (float, default = 25)
--frame-shift : Frame shift in milliseconds (float, default = 10)
--high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
--htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
--low-freq : Low cutoff frequency for mel bins (float, default = 20)
--max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
--min-duration : Minimum duration of segments to process (in seconds). (float, default = 0)
--num-mel-bins : Number of triangular mel-frequency bins (int, default = 23)
--output-format : Format of the output files [kaldi, htk] (string, default = "kaldi")
--preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97)
--raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true)
--remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true)
--round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
--sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
--snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
--subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false)
--use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false)
--use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true)
--use-power : If true, use power, else use magnitude. (bool, default = true)
--utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
--vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
--vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
--vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
--vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
--window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
--write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
Standard options:
--config : Configuration file to read (this option may be repeated) (string, default = "")
--help : Print out usage message (bool, default = false)
--print-args : Print the command line arguments (to stderr) (bool, default = true)
--verbose : Verbose level (higher->more logging) (int, default = 0)

View File

@ -1,65 +0,0 @@
$ python3
Python 3.8.0 (default, Oct 28 2019, 16:14:01)
[GCC 8.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import kaldifeat
>>> opts = kaldifeat.FbankOptions()
>>> print(opts)
frame_opts:
samp_freq: 16000
frame_shift_ms: 10
frame_length_ms: 25
dither: 1
preemph_coeff: 0.97
remove_dc_offset: 1
window_type: povey
round_to_power_of_two: 1
blackman_coeff: 0.42
snip_edges: 1
max_feature_vectors: -1
mel_opts:
num_bins: 23
low_freq: 20
high_freq: 0
vtln_low: 100
vtln_high: -500
debug_mel: 0
htk_mode: 0
use_energy: 0
energy_floor: 0
raw_energy: 1
htk_compat: 0
use_log_fbank: 1
use_power: 1
device: cpu
>>> print(opts.dither)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: '_kaldifeat.FbankOptions' object has no attribute 'dither'
>>>
>>> print(opts.frame_opts.dither)
1.0
>>> opts.frame_opts.dither = 0 # disable dither
>>> print(opts.frame_opts.dither)
0.0
>>> import torch
>>> print(opts.device)
cpu
>>> opts.device = 'cuda:0'
>>> print(opts.device)
cuda:0
>>> opts.device = torch.device('cuda', 1)
>>> print(opts.device)
cuda:1
>>> opts.device = 'cpu'
>>> print(opts.device)
cpu
>>> print(opts.mel_opts.num_bins)
23
>>> opts.mel_opts.num_bins = 80
>>> print(opts.mel_opts.num_bins)
80

View File

@ -1 +0,0 @@
../../../../kaldifeat/python/tests/test_fbank_options.py

View File

@ -1,3 +0,0 @@
kaldifeat.Fbank
===============

View File

@ -1,51 +0,0 @@
kaldifeat.FbankOptions
======================
If you want to construct an instance of `kaldifeat.Fbank`_ or
`kaldifeat.OnlineFbank`_, you have to provide an instance of
`kaldifeat.FbankOptions`_.
The following code shows how to construct an instance of `kaldifeat.FbankOptions`_.
.. literalinclude:: ./code/fbank_options-1.txt
:caption: Usage of `kaldifeat.FbankOptions`_
:emphasize-lines: 6,8,22,37
Note that we reuse the same option name with `compute-fbank-feats`_ from `Kaldi`_:
.. code-block:: bash
$ compute-fbank-feats --help
.. literalinclude:: ./code/compute-fbank-feats-help.txt
:caption: Output of ``compute-fbank-feats --help``
Please refer to the output of ``compute-fbank-feats --help`` for the meaning
of each field of `kaldifeat.FbankOptions`_.
One thing worth noting is that `kaldifeat.FbankOptions`_ has a field ``device``,
which is an instance of ``torch.device``. You can assign it either a string, e.g.,
``"cpu"`` or ``"cuda:0"``, or an instance of ``torch.device``, e.g., ``torch.device("cpu")`` or
``torch.device("cuda", 1)``.
.. hint::
You can use this field to control whether the feature computer
constructed from it performs computation on CPU or CUDA.
.. caution::
If you use a CUDA device, make sure that you have installed a CUDA version
of `PyTorch`_.
Example usage
-------------
The following code from
`<https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_fbank_options.py>`_
demonstrate the usage of `kaldifeat.FbankOptions`_:
.. literalinclude:: ./code/test_fbank_options.py
:caption: Example usage of `kaldifeat.FbankOptions`_
:language: python

View File

@ -1,11 +0,0 @@
Usage
=====
This section describes how to use feature computers in `kaldifeat`_.
.. toctree::
:maxdepth: 2
fbank_options
fbank
online_fbank

View File

@ -1,3 +0,0 @@
kaldifeat.OnlineFbank
=====================

View File

@ -1,106 +0,0 @@
#!/usr/bin/env python3
import datetime
import os
import platform
import re
import shutil
import torch
def is_macos():
return platform.system() == "Darwin"
def is_windows():
return platform.system() == "Windows"
def with_cuda():
if shutil.which("nvcc") is None:
return False
if is_macos():
return False
return True
def get_pytorch_version():
# if it is 1.7.1+cuda101, then strip +cuda101
return torch.__version__.split("+")[0]
def get_cuda_version():
from torch.utils import collect_env
running_cuda_version = collect_env.get_running_cuda_version(collect_env.run)
cuda_version = torch.version.cuda
if running_cuda_version is not None and cuda_version is not None:
assert cuda_version in running_cuda_version, (
f"PyTorch is built with CUDA version: {cuda_version}.\n"
f"The current running CUDA version is: {running_cuda_version}"
)
return cuda_version
def is_for_pypi():
ans = os.environ.get("KALDIFEAT_IS_FOR_PYPI", None)
return ans is not None
def is_stable():
ans = os.environ.get("KALDIFEAT_IS_STABLE", None)
return ans is not None
def is_for_conda():
ans = os.environ.get("KALDIFEAT_IS_FOR_CONDA", None)
return ans is not None
def get_package_version():
# Set a default CUDA version here so that `pip install kaldifeat`
# uses the default CUDA version.
#
default_cuda_version = "10.1" # CUDA 10.1
if with_cuda():
cuda_version = get_cuda_version()
if is_for_pypi() and default_cuda_version == cuda_version:
cuda_version = ""
pytorch_version = ""
local_version = ""
else:
cuda_version = f"+cuda{cuda_version}"
pytorch_version = get_pytorch_version()
local_version = f"{cuda_version}.torch{pytorch_version}"
else:
pytorch_version = get_pytorch_version()
local_version = f"+cpu.torch{pytorch_version}"
if is_for_conda():
local_version = ""
if is_for_pypi() and is_macos():
local_version = ""
with open("CMakeLists.txt") as f:
content = f.read()
latest_version = re.search(r"set\(kaldifeat_VERSION (.*)\)", content).group(
1
)
latest_version = latest_version.strip('"')
if not is_stable():
dt = datetime.datetime.utcnow()
package_version = f"{latest_version}.dev{dt.year}{dt.month:02d}{dt.day:02d}{local_version}"
else:
package_version = f"{latest_version}"
return package_version
if __name__ == "__main__":
print(get_package_version())

View File

@ -1,4 +1,2 @@
add_subdirectory(csrc)
if(kaldifeat_BUILD_PYMODULE)
add_subdirectory(python)
endif()
add_subdirectory(python)

View File

@ -9,26 +9,11 @@ set(kaldifeat_srcs
feature-window.cc
matrix-functions.cc
mel-computations.cc
online-feature.cc
whisper-fbank.cc
)
add_library(kaldifeat_core ${kaldifeat_srcs})
add_library(kaldifeat_core SHARED ${kaldifeat_srcs})
target_link_libraries(kaldifeat_core PUBLIC ${TORCH_LIBRARIES})
target_compile_definitions(kaldifeat_core PUBLIC KALDIFEAT_TORCH_VERSION_MAJOR=${KALDIFEAT_TORCH_VERSION_MAJOR})
target_compile_definitions(kaldifeat_core PUBLIC KALDIFEAT_TORCH_VERSION_MINOR=${KALDIFEAT_TORCH_VERSION_MINOR})
if(APPLE)
execute_process(
COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
)
message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
target_link_libraries(kaldifeat_core PUBLIC "-L ${PYTHON_SITE_PACKAGE_DIR}/../..")
endif()
add_executable(test_kaldifeat test_kaldifeat.cc)
target_link_libraries(test_kaldifeat PRIVATE kaldifeat_core)
@ -42,52 +27,19 @@ function(kaldifeat_add_test source)
gtest_main
)
# NOTE: We set the working directory here so that
# it works also on windows. The reason is that
# the required DLLs are inside ${TORCH_DIR}/lib
# and they can be found by the exe if the current
# working directory is ${TORCH_DIR}\lib
add_test(NAME "Test.${name}"
COMMAND
$<TARGET_FILE:${name}>
WORKING_DIRECTORY ${TORCH_DIR}/lib
)
endfunction()
if(kaldifeat_BUILD_TESTS)
if(BUILD_TESTS)
# please sort the source files alphabetically
set(test_srcs
feature-window-test.cc
online-feature-test.cc
)
foreach(source IN LISTS test_srcs)
kaldifeat_add_test(${source})
endforeach()
endif()
file(MAKE_DIRECTORY
DESTINATION
${PROJECT_BINARY_DIR}/include/kaldifeat/csrc
)
file(GLOB_RECURSE all_headers *.h)
message(STATUS "All headers: ${all_headers}")
file(COPY
${all_headers}
DESTINATION
${PROJECT_BINARY_DIR}/include/kaldifeat/csrc
)
if(BUILD_SHARED_LIBS AND WIN32)
install(TARGETS kaldifeat_core
DESTINATION ../
)
endif()
install(TARGETS kaldifeat_core
DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
install(FILES ${all_headers}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/kaldifeat/csrc
)

View File

@ -1 +0,0 @@
exclude_files=whisper-mel-bank.h,whisper-v3-mel-bank.h

View File

@ -55,17 +55,10 @@ torch::Tensor OfflineFeatureTpl<F>::ComputeFeatures(const torch::Tensor &wave,
int32_t padding = frame_opts.PaddedWindowSize() - strided_input.size(1);
if (padding > 0) {
#ifdef __ANDROID__
auto padding_value = torch::zeros(
{strided_input.size(0), padding},
torch::dtype(torch::kFloat).device(strided_input.device()));
strided_input = torch::cat({strided_input, padding_value}, 1);
#else
strided_input = torch::nn::functional::pad(
strided_input, torch::nn::functional::PadFuncOptions({0, padding})
.mode(torch::kConstant)
.value(0));
#endif
}
return computer_.Compute(log_energy_pre_window, vtln_warp, strided_input);

View File

@ -7,23 +7,7 @@
#ifndef KALDIFEAT_CSRC_FEATURE_COMMON_H_
#define KALDIFEAT_CSRC_FEATURE_COMMON_H_
#include "kaldifeat/csrc/feature-functions.h"
#include "kaldifeat/csrc/feature-window.h"
// See "The torch.fft module in PyTorch 1.7"
// https://github.com/pytorch/pytorch/wiki/The-torch.fft-module-in-PyTorch-1.7
#if KALDIFEAT_TORCH_VERSION_MAJOR > 1 || \
(KALDIFEAT_TORCH_VERSION_MAJOR == 1 && KALDIFEAT_TORCH_VERSION_MINOR > 6)
#include "torch/fft.h"
#define KALDIFEAT_HAS_FFT_NAMESPACE
// It uses torch::fft::rfft
// Its input shape is [x, N], output shape is [x, N/2]
// which is a complex tensor
#else
#include "ATen/Functions.h"
// It uses torch::fft
// Its input shape is [x, N], output shape is [x, N/2, 2]
// which contains the real part [..., ], and imaginary part [..., 1]
#endif
namespace kaldifeat {
@ -62,10 +46,6 @@ class OfflineFeatureTpl {
int32_t Dim() const { return computer_.Dim(); }
const Options &GetOptions() const { return computer_.GetOptions(); }
const FrameExtractionOptions &GetFrameOptions() const {
return GetOptions().frame_opts;
}
// Copy constructor.
OfflineFeatureTpl(const OfflineFeatureTpl<F> &) = delete;
OfflineFeatureTpl<F> &operator=(const OfflineFeatureTpl<F> &) = delete;

View File

@ -8,6 +8,9 @@
#include <cmath>
#include "torch/fft.h"
#include "torch/torch.h"
namespace kaldifeat {
std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
@ -63,20 +66,7 @@ torch::Tensor FbankComputer::Compute(torch::Tensor signal_raw_log_energy,
}
// note spectrum is in magnitude, not power, because of `abs()`
#if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
// signal_frame shape: [x, 512]
// spectrum shape [x, 257]
torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
#else
// signal_frame shape [x, 512]
// real_imag shape [x, 257, 2],
// where [..., 0] is the real part
// [..., 1] is the imaginary part
torch::Tensor real_imag = torch::rfft(signal_frame, 1);
torch::Tensor real = real_imag.index({"...", 0});
torch::Tensor imag = real_imag.index({"...", 1});
torch::Tensor spectrum = (real.square() + imag.square()).sqrt();
#endif
// remove the last column, i.e., the highest fft bin
spectrum = spectrum.index(

View File

@ -13,6 +13,7 @@
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/mel-computations.h"
#include "torch/torch.h"
namespace kaldifeat {
@ -44,18 +45,20 @@ struct FbankOptions {
std::string ToString() const {
std::ostringstream os;
os << "FbankOptions(";
os << "frame_opts: \n";
os << frame_opts << "\n";
os << "\n";
os << "frame_opts=" << frame_opts.ToString() << ", ";
os << "mel_opts=" << mel_opts.ToString() << ", ";
os << "mel_opts: \n";
os << mel_opts << "\n";
os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
os << "energy_floor=" << energy_floor << ", ";
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
os << "use_log_fbank=" << (use_log_fbank ? "True" : "False") << ", ";
os << "use_power=" << (use_power ? "True" : "False") << ", ";
os << "device=\"" << device << "\")";
os << "use_energy: " << use_energy << "\n";
os << "energy_floor: " << energy_floor << "\n";
os << "raw_energy: " << raw_energy << "\n";
os << "htk_compat: " << htk_compat << "\n";
os << "use_log_fbank: " << use_log_fbank << "\n";
os << "use_power: " << use_power << "\n";
os << "device: " << device << "\n";
return os.str();
}
};

View File

@ -7,7 +7,7 @@
#ifndef KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_
#define KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_
#include "torch/script.h"
#include "torch/torch.h"
namespace kaldifeat {

View File

@ -91,20 +91,7 @@ torch::Tensor MfccComputer::Compute(torch::Tensor signal_raw_log_energy,
}
// note spectrum is in magnitude, not power, because of `abs()`
#if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
// signal_frame shape: [x, 512]
// spectrum shape [x, 257
torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
#else
// signal_frame shape [x, 512]
// real_imag shape [x, 257, 2],
// where [..., 0] is the real part
// [..., 1] is the imaginary part
torch::Tensor real_imag = torch::rfft(signal_frame, 1);
torch::Tensor real = real_imag.index({"...", 0});
torch::Tensor imag = real_imag.index({"...", 1});
torch::Tensor spectrum = (real.square() + imag.square()).sqrt();
#endif
// remove the last column, i.e., the highest fft bin
spectrum = spectrum.index(

View File

@ -13,7 +13,7 @@
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/mel-computations.h"
#include "torch/script.h"
#include "torch/torch.h"
namespace kaldifeat {
@ -53,18 +53,20 @@ struct MfccOptions {
std::string ToString() const {
std::ostringstream os;
os << "MfccOptions(";
os << "frame_opts=" << frame_opts.ToString() << ", ";
os << "mel_opts=" << mel_opts.ToString() << ", ";
os << "frame_opts: \n";
os << frame_opts << "\n";
os << "\n";
os << "num_ceps=" << num_ceps << ", ";
os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
os << "energy_floor=" << energy_floor << ", ";
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
os << "cepstral_lifter=" << cepstral_lifter << ", ";
os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
os << "device=\"" << device << "\")";
os << "mel_opts: \n";
os << mel_opts << "\n";
os << "num_ceps: " << num_ceps << "\n";
os << "use_energy: " << use_energy << "\n";
os << "energy_floor: " << energy_floor << "\n";
os << "raw_energy: " << raw_energy << "\n";
os << "cepstral_lifter: " << cepstral_lifter << "\n";
os << "htk_compat: " << htk_compat << "\n";
os << "device: " << device << "\n";
return os.str();
}
};

View File

@ -7,6 +7,7 @@
#include "kaldifeat/csrc/feature-plp.h"
#include "kaldifeat/csrc/feature-functions.h"
#include "torch/torch.h"
namespace kaldifeat {
@ -96,20 +97,7 @@ torch::Tensor PlpComputer::Compute(torch::Tensor signal_raw_log_energy,
}
// note spectrum is in magnitude, not power, because of `abs()`
#if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
// signal_frame shape: [x, 512]
// spectrum shape [x, 257
torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
#else
// signal_frame shape [x, 512]
// real_imag shape [x, 257, 2],
// where [..., 0] is the real part
// [..., 1] is the imaginary part
torch::Tensor real_imag = torch::rfft(signal_frame, 1);
torch::Tensor real = real_imag.index({"...", 0});
torch::Tensor imag = real_imag.index({"...", 1});
torch::Tensor spectrum = (real.square() + imag.square()).sqrt();
#endif
// remove the last column, i.e., the highest fft bin
spectrum = spectrum.index(

View File

@ -13,7 +13,7 @@
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/mel-computations.h"
#include "torch/script.h"
#include "torch/torch.h"
namespace kaldifeat {
@ -61,21 +61,23 @@ struct PlpOptions {
std::string ToString() const {
std::ostringstream os;
os << "PlpOptions(";
os << "frame_opts: \n";
os << frame_opts << "\n";
os << "\n";
os << "frame_opts=" << frame_opts.ToString() << ", ";
os << "mel_opts=" << mel_opts.ToString() << ", ";
os << "mel_opts: \n";
os << mel_opts << "\n";
os << "lpc_order=" << lpc_order << ", ";
os << "num_ceps=" << num_ceps << ", ";
os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
os << "energy_floor=" << energy_floor << ", ";
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
os << "compress_factor=" << compress_factor << ", ";
os << "cepstral_lifter=" << cepstral_lifter << ", ";
os << "cepstral_scale=" << cepstral_scale << ", ";
os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
os << "device=\"" << device << "\")";
os << "lpc_order: " << lpc_order << "\n";
os << "num_ceps: " << num_ceps << "\n";
os << "use_energy: " << use_energy << "\n";
os << "energy_floor: " << energy_floor << "\n";
os << "raw_energy: " << raw_energy << "\n";
os << "compress_factor: " << compress_factor << "\n";
os << "cepstral_lifter: " << cepstral_lifter << "\n";
os << "cepstral_scale: " << cepstral_scale << "\n";
os << "htk_compat: " << htk_compat << "\n";
os << "device: " << device << "\n";
return os.str();
}
};

View File

@ -36,21 +36,7 @@ torch::Tensor SpectrogramComputer::Compute(torch::Tensor signal_raw_log_energy,
}
// note spectrum is in magnitude, not power, because of `abs()`
#if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
// signal_frame shape: [x, 512]
// spectrum shape [x, 257
torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
#else
// signal_frame shape [x, 512]
// real_imag shape [x, 257, 2],
// where [..., 0] is the real part
// [..., 1] is the imaginary part
torch::Tensor real_imag = torch::rfft(signal_frame, 1);
torch::Tensor real = real_imag.index({"...", 0});
torch::Tensor imag = real_imag.index({"...", 1});
torch::Tensor spectrum = (real.square() + imag.square()).sqrt();
#endif
if (opts_.return_raw_fft) {
KALDIFEAT_ERR << "return raw fft is not supported yet";
}

View File

@ -11,7 +11,7 @@
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"
#include "torch/script.h"
#include "torch/torch.h"
namespace kaldifeat {
@ -36,12 +36,13 @@ struct SpectrogramOptions {
std::string ToString() const {
std::ostringstream os;
os << "SpectrogramOptions(";
os << "frame_opts=" << frame_opts.ToString() << ", ";
os << "energy_floor=" << energy_floor << ", ";
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
os << "return_raw_fft=" << (return_raw_fft ? "True" : "False") << ", ";
os << "device=\"" << device << "\")";
os << "frame_opts: \n";
os << frame_opts << "\n";
os << "energy_floor: " << energy_floor << "\n";
os << "raw_energy: " << raw_energy << "\n";
// os << "return_raw_fft: " << return_raw_fft << "\n";
os << "device: " << device << "\n";
return os.str();
}
};

View File

@ -9,6 +9,8 @@
#include <cmath>
#include <vector>
#include "torch/torch.h"
#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif
@ -29,13 +31,6 @@ FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts,
float *window_data = window.data_ptr<float>();
double a = M_2PI / (frame_length - 1);
if (opts.window_type == "hann") {
// see https://pytorch.org/docs/stable/generated/torch.hann_window.html
// We assume periodic is true
a = M_2PI / frame_length;
}
for (int32_t i = 0; i < frame_length; i++) {
double i_fl = static_cast<double>(i);
if (opts.window_type == "hanning") {
@ -46,8 +41,6 @@ FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts,
window_data[i] = sin(0.5 * a * i_fl);
} else if (opts.window_type == "hamming") {
window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
} else if (opts.window_type == "hann") {
window_data[i] = 0.50 - 0.50 * cos(a * i_fl);
} else if (opts.window_type ==
"povey") { // like hamming but goes to zero at edges.
window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
@ -164,26 +157,25 @@ torch::Tensor GetStrided(const torch::Tensor &wave,
}
torch::Tensor Dither(const torch::Tensor &wave, float dither_value) {
if (dither_value == 0.0f) return wave;
if (dither_value == 0.0f) wave;
torch::Tensor rand_gauss = torch::randn_like(wave);
#if 1
return wave + rand_gauss * dither_value;
#else
// use in-place version of wave and change it to pointer type
// use in-place version of wave and change its to pointer type
wave_->add_(rand_gauss, dither_value);
#endif
}
torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave) {
using namespace torch::indexing; // It imports: Slice, None // NOLINT
if (preemph_coeff == 0.0f) return wave;
KALDIFEAT_ASSERT(preemph_coeff >= 0.0f && preemph_coeff <= 1.0f);
torch::Tensor ans = torch::empty_like(wave);
using torch::indexing::None;
using torch::indexing::Slice;
// right = wave[:, 1:]
torch::Tensor right = wave.index({"...", Slice(1, None, None)});
@ -198,59 +190,4 @@ torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave) {
return ans;
}
torch::Tensor ExtractWindow(int64_t sample_offset, const torch::Tensor &wave,
int32_t f, const FrameExtractionOptions &opts) {
KALDIFEAT_ASSERT(sample_offset >= 0 && wave.numel() != 0);
int32_t frame_length = opts.WindowSize();
int64_t num_samples = sample_offset + wave.numel();
int64_t start_sample = FirstSampleOfFrame(f, opts);
int64_t end_sample = start_sample + frame_length;
if (opts.snip_edges) {
KALDIFEAT_ASSERT(start_sample >= sample_offset &&
end_sample <= num_samples);
} else {
KALDIFEAT_ASSERT(sample_offset == 0 || start_sample >= sample_offset);
}
// wave_start and wave_end are start and end indexes into 'wave', for the
// piece of wave that we're trying to extract.
int32_t wave_start = static_cast<int32_t>(start_sample - sample_offset);
int32_t wave_end = wave_start + frame_length;
if (wave_start >= 0 && wave_end <= wave.numel()) {
// the normal case -- no edge effects to consider.
// return wave[wave_start:wave_end]
return wave.index({torch::indexing::Slice(wave_start, wave_end)});
} else {
torch::Tensor window = torch::empty({frame_length}, torch::kFloat);
auto p_window = window.accessor<float, 1>();
auto p_wave = wave.accessor<float, 1>();
// Deal with any end effects by reflection, if needed. This code will only
// be reached for about two frames per utterance, so we don't concern
// ourselves excessively with efficiency.
int32_t wave_dim = wave.numel();
for (int32_t s = 0; s != frame_length; ++s) {
int32_t s_in_wave = s + wave_start;
while (s_in_wave < 0 || s_in_wave >= wave_dim) {
// reflect around the beginning or end of the wave.
// e.g. -1 -> 0, -2 -> 1.
// dim -> dim - 1, dim + 1 -> dim - 2.
// the code supports repeated reflections, although this
// would only be needed in pathological cases.
if (s_in_wave < 0) {
s_in_wave = -s_in_wave - 1;
} else {
s_in_wave = 2 * wave_dim - 1 - s_in_wave;
}
}
p_window[s] = p_wave[s_in_wave];
}
return window;
}
}
} // namespace kaldifeat

View File

@ -7,8 +7,7 @@
#include <string>
#include "kaldifeat/csrc/log.h"
#include "torch/all.h"
#include "torch/script.h"
#include "torch/torch.h"
#ifndef KALDIFEAT_CSRC_FEATURE_WINDOW_H_
#define KALDIFEAT_CSRC_FEATURE_WINDOW_H_
@ -44,11 +43,7 @@ struct FrameExtractionOptions {
bool snip_edges = true;
// bool allow_downsample = false;
// bool allow_upsample = false;
// Used for streaming feature extraction. It indicates the number
// of feature frames to keep in the recycling vector. -1 means to
// keep all feature frames.
int32_t max_feature_vectors = -1;
// int32_t max_feature_vectors = -1;
int32_t WindowShift() const {
return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
@ -62,20 +57,21 @@ struct FrameExtractionOptions {
}
std::string ToString() const {
std::ostringstream os;
os << "FrameExtractionOptions(";
os << "samp_freq=" << samp_freq << ", ";
os << "frame_shift_ms=" << frame_shift_ms << ", ";
os << "frame_length_ms=" << frame_length_ms << ", ";
os << "dither=" << dither << ", ";
os << "preemph_coeff=" << preemph_coeff << ", ";
os << "remove_dc_offset=" << (remove_dc_offset ? "True" : "False") << ", ";
os << "window_type=" << '"' << window_type << '"' << ", ";
os << "round_to_power_of_two=" << (round_to_power_of_two ? "True" : "False")
<< ", ";
os << "blackman_coeff=" << blackman_coeff << ", ";
os << "snip_edges=" << (snip_edges ? "True" : "False") << ", ";
os << "max_feature_vectors=" << max_feature_vectors << ")";
#define KALDIFEAT_PRINT(x) os << #x << ": " << x << "\n"
KALDIFEAT_PRINT(samp_freq);
KALDIFEAT_PRINT(frame_shift_ms);
KALDIFEAT_PRINT(frame_length_ms);
KALDIFEAT_PRINT(dither);
KALDIFEAT_PRINT(preemph_coeff);
KALDIFEAT_PRINT(remove_dc_offset);
KALDIFEAT_PRINT(window_type);
KALDIFEAT_PRINT(round_to_power_of_two);
KALDIFEAT_PRINT(blackman_coeff);
KALDIFEAT_PRINT(snip_edges);
// KALDIFEAT_PRINT(allow_downsample);
// KALDIFEAT_PRINT(allow_upsample);
// KALDIFEAT_PRINT(max_feature_vectors);
#undef KALDIFEAT_PRINT
return os.str();
}
};
@ -103,11 +99,11 @@ class FeatureWindowFunction {
@param [in] flush True if we are asserting that this number of samples
is 'all there is', false if we expecting more data to possibly come in. This
only makes a difference to the answer
if opts.snips_edges== false. For offline feature extraction you always want
flush == true. In an online-decoding context, once you know (or decide) that
no more data is coming in, you'd call it with flush == true at the end to
flush out any remaining data.
only makes a difference to the answer if opts.snips_edges
== false. For offline feature extraction you always want flush ==
true. In an online-decoding context, once you know (or decide)
that no more data is coming in, you'd call it with flush == true at the end
to flush out any remaining data.
*/
int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
bool flush = true);
@ -136,29 +132,6 @@ torch::Tensor Dither(const torch::Tensor &wave, float dither_value);
torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave);
/*
ExtractWindow() extracts "frame_length" samples from the given waveform.
Note: This function only extracts "frame_length" samples
from the input waveform, without any further processing.
@param [in] sample_offset If 'wave' is not the entire waveform, but
part of it to the left has been discarded, then the
number of samples prior to 'wave' that we have
already discarded. Set this to zero if you are
processing the entire waveform in one piece, or
if you get 'no matching function' compilation
errors when updating the code.
@param [in] wave The waveform
@param [in] f The frame index to be extracted, with
0 <= f < NumFrames(sample_offset + wave.numel(), opts, true)
@param [in] opts The options class to be used
@return Return a tensor containing "frame_length" samples extracted from
`wave`, without any further processing. Its shape is
(1, frame_length).
*/
torch::Tensor ExtractWindow(int64_t sample_offset, const torch::Tensor &wave,
int32_t f, const FrameExtractionOptions &opts);
} // namespace kaldifeat
#endif // KALDIFEAT_CSRC_FEATURE_WINDOW_H_

View File

@ -1,39 +0,0 @@
#!/usr/bin/env python3
# Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
import librosa
import numpy as np
def main():
m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=128)
assert m.shape == (128, 201)
s = "// Auto-generated. Do NOT edit!\n\n"
s += "// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)\n\n"
s += "\n"
s += "#ifndef KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
s += "#define KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
s += "namespace kaldifeat {\n\n"
s += f"constexpr int32_t kWhisperV3MelRows = {m.shape[0]};\n"
s += f"constexpr int32_t kWhisperV3MelCols = {m.shape[1]};\n"
s += "\n"
s += "constexpr float kWhisperV3MelArray[] = {\n"
sep = ""
for i, f in enumerate(m.reshape(-1).tolist()):
s += f"{sep}{f:.8f}"
sep = ", "
if i and i % 7 == 0:
s += ",\n"
sep = ""
s += "};\n\n"
s += "} // namespace kaldifeat\n\n"
s += "#endif // KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
with open("whisper-v3-mel-bank.h", "w") as f:
f.write(s)
if __name__ == "__main__":
main()

View File

@ -1,39 +0,0 @@
#!/usr/bin/env python3
# Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
import librosa
import numpy as np
def main():
m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=80)
assert m.shape == (80, 201)
s = "// Auto-generated. Do NOT edit!\n\n"
s += "// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)\n\n"
s += "\n"
s += "#ifndef KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
s += "#define KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
s += "namespace kaldifeat {\n\n"
s += f"constexpr int32_t kWhisperMelRows = {m.shape[0]};\n"
s += f"constexpr int32_t kWhisperMelCols = {m.shape[1]};\n"
s += "\n"
s += "constexpr float kWhisperMelArray[] = {\n"
sep = ""
for i, f in enumerate(m.reshape(-1).tolist()):
s += f"{sep}{f:.8f}"
sep = ", "
if i and i % 7 == 0:
s += ",\n"
sep = ""
s += "};\n\n"
s += "} // namespace kaldifeat\n\n"
s += "#endif // KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
with open("whisper-mel-bank.h", "w") as f:
f.write(s)
if __name__ == "__main__":
main()

View File

@ -5,7 +5,6 @@
#ifndef KALDIFEAT_CSRC_LOG_H_
#define KALDIFEAT_CSRC_LOG_H_
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <sstream>

View File

@ -7,7 +7,7 @@
#ifndef KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_
#define KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_
#include "torch/script.h"
#include "torch/torch.h"
namespace kaldifeat {

View File

@ -138,7 +138,7 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
<< " and vtln-high " << vtln_high << ", versus "
<< "low-freq " << low_freq << " and high-freq " << high_freq;
// we will transpose bins_mat_ at the end of this function
// we will transpose bins_mat_ at the end of this funciton
bins_mat_ = torch::zeros({num_bins, num_fft_bins}, torch::kFloat);
int32_t stride = bins_mat_.strides()[0];
@ -179,14 +179,12 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
last_index = i;
}
}
// Note: It is possible that first_index == last_index == -1 at this line.
KALDIFEAT_ASSERT(first_index != -1 && last_index >= first_index &&
"You may have set num_mel_bins too large.");
// Replicate a bug in HTK, for testing purposes.
if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f &&
first_index != -1) {
if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f)
this_bin[first_index] = 0.0f;
}
}
if (debug_) KALDIFEAT_LOG << bins_mat_;
@ -198,15 +196,6 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
}
}
MelBanks::MelBanks(const float *weights, int32_t num_rows, int32_t num_cols,
torch::Device device)
: debug_(false), htk_mode_(false) {
bins_mat_ = torch::from_blob(const_cast<float *>(weights),
{num_rows, num_cols}, torch::kFloat)
.t()
.to(device);
}
torch::Tensor MelBanks::Compute(const torch::Tensor &spectrum) const {
return torch::mm(spectrum, bins_mat_);
}

View File

@ -36,14 +36,13 @@ struct MelBanksOptions {
std::string ToString() const {
std::ostringstream os;
os << "MelBanksOptions(";
os << "num_bins=" << num_bins << ", ";
os << "low_freq=" << low_freq << ", ";
os << "high_freq=" << high_freq << ", ";
os << "vtln_low=" << vtln_low << ", ";
os << "vtln_high=" << vtln_high << ", ";
os << "debug_mel=" << (debug_mel ? "True" : "False") << ", ";
os << "htk_mode=" << (htk_mode ? "True" : "False") << ")";
os << "num_bins: " << num_bins << "\n";
os << "low_freq: " << low_freq << "\n";
os << "high_freq: " << high_freq << "\n";
os << "vtln_low: " << vtln_low << "\n";
os << "vtln_high: " << vtln_high << "\n";
os << "debug_mel: " << debug_mel << "\n";
os << "htk_mode: " << htk_mode << "\n";
return os.str();
}
};
@ -76,17 +75,6 @@ class MelBanks {
const FrameExtractionOptions &frame_opts, float vtln_warp_factor,
torch::Device device);
// Initialize with a 2-d weights matrix
//
// Note: This constructor is for Whisper. It does not initialize
// center_freqs_.
//
// @param weights Pointer to the start address of the matrix
// @param num_rows It equals to number of mel bins
// @param num_cols It equals to (number of fft bins)/2+1
MelBanks(const float *weights, int32_t num_rows, int32_t num_cols,
torch::Device device);
// CAUTION: we save a transposed version of bins_mat_, so return size(1) here
int32_t NumBins() const { return static_cast<int32_t>(bins_mat_.size(1)); }
@ -100,8 +88,7 @@ class MelBanks {
private:
// A 2-D matrix. Its shape is NOT [num_bins, num_fft_bins]
// Its shape is [num_fft_bins, num_bins] for non-whisper.
// For whisper, its shape is [num_fft_bins/2+1, num_bins]
// Its shape is [num_fft_bins, num_bins].
torch::Tensor bins_mat_;
// center frequencies of bins, numbered from 0 ... num_bins-1.

View File

@ -1,89 +0,0 @@
// kaldifeat/csrc/online-feature-itf.h
//
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
// This file is copied/modified from kaldi/src/itf/online-feature-itf.h
#ifndef KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_
#define KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_
#include <utility>
#include <vector>
#include "torch/script.h"
namespace kaldifeat {
class OnlineFeatureInterface {
public:
virtual ~OnlineFeatureInterface() = default;
virtual int32_t Dim() const = 0; /// returns the feature dimension.
//
// Returns frame shift in seconds. Helps to estimate duration from frame
// counts.
virtual float FrameShiftInSeconds() const = 0;
/// Returns the total number of frames, since the start of the utterance, that
/// are now available. In an online-decoding context, this will likely
/// increase with time as more data becomes available.
virtual int32_t NumFramesReady() const = 0;
/// Returns true if this is the last frame. Frame indices are zero-based, so
/// the first frame is zero. IsLastFrame(-1) will return false, unless the
/// file is empty (which is a case that I'm not sure all the code will handle,
/// so be careful). This function may return false for some frame if we
/// haven't yet decided to terminate decoding, but later true if we decide to
/// terminate decoding. This function exists mainly to correctly handle end
/// effects in feature extraction, and is not a mechanism to determine how
/// many frames are in the decodable object (as it used to be, and for
/// backward compatibility, still is, in the Decodable interface).
virtual bool IsLastFrame(int32_t frame) const = 0;
/// Gets the feature vector for this frame. Before calling this for a given
/// frame, it is assumed that you called NumFramesReady() and it returned a
/// number greater than "frame". Otherwise this call will likely crash with
/// an assert failure. This function is not declared const, in case there is
/// some kind of caching going on, but most of the time it shouldn't modify
/// the class.
///
/// The returned tensor has shape (1, Dim()).
virtual torch::Tensor GetFrame(int32_t frame) = 0;
/// This is like GetFrame() but for a collection of frames. There is a
/// default implementation that just gets the frames one by one, but it
/// may be overridden for efficiency by child classes (since sometimes
/// it's more efficient to do things in a batch).
///
/// The returned tensor has shape (frames.size(), Dim()).
virtual std::vector<torch::Tensor> GetFrames(
const std::vector<int32_t> &frames) {
std::vector<torch::Tensor> features;
features.reserve(frames.size());
for (auto i : frames) {
torch::Tensor f = GetFrame(i);
features.push_back(std::move(f));
}
return features;
#if 0
return torch::cat(features, /*dim*/ 0);
#endif
}
/// This would be called from the application, when you get more wave data.
/// Note: the sampling_rate is typically only provided so the code can assert
/// that it matches the sampling rate expected in the options.
virtual void AcceptWaveform(float sampling_rate,
const torch::Tensor &waveform) = 0;
/// InputFinished() tells the class you won't be providing any
/// more waveform. This will help flush out the last few frames
/// of delta or LDA features (it will typically affect the return value
/// of IsLastFrame.
virtual void InputFinished() = 0;
};
} // namespace kaldifeat
#endif // KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_

View File

@ -1,49 +0,0 @@
// kaldifeat/csrc/online-feature-test.h
//
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
#include "kaldifeat/csrc/online-feature.h"
#include "gtest/gtest.h"
namespace kaldifeat {
TEST(RecyclingVector, TestUnlimited) {
RecyclingVector v(-1);
constexpr int32_t N = 100;
for (int32_t i = 0; i != N; ++i) {
torch::Tensor t = torch::tensor({i, i + 1, i + 2});
v.PushBack(t);
}
ASSERT_EQ(v.Size(), N);
for (int32_t i = 0; i != N; ++i) {
torch::Tensor t = v.At(i);
torch::Tensor expected = torch::tensor({i, i + 1, i + 2});
EXPECT_TRUE(t.equal(expected));
}
}
TEST(RecyclingVector, Testlimited) {
constexpr int32_t K = 3;
constexpr int32_t N = 10;
RecyclingVector v(K);
for (int32_t i = 0; i != N; ++i) {
torch::Tensor t = torch::tensor({i, i + 1, i + 2});
v.PushBack(t);
}
ASSERT_EQ(v.Size(), N);
for (int32_t i = 0; i < N - K; ++i) {
ASSERT_DEATH(v.At(i), "");
}
for (int32_t i = N - K; i != N; ++i) {
torch::Tensor t = v.At(i);
torch::Tensor expected = torch::tensor({i, i + 1, i + 2});
EXPECT_TRUE(t.equal(expected));
}
}
} // namespace kaldifeat

View File

@ -1,133 +0,0 @@
// kaldifeat/csrc/online-feature.cc
//
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
// This file is copied/modified from kaldi/src/feat/online-feature.cc
#include "kaldifeat/csrc/online-feature.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/log.h"
namespace kaldifeat {
RecyclingVector::RecyclingVector(int32_t items_to_hold)
: items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
first_available_index_(0) {}
torch::Tensor RecyclingVector::At(int32_t index) const {
if (index < first_available_index_) {
KALDIFEAT_ERR << "Attempted to retrieve feature vector that was "
"already removed by the RecyclingVector (index = "
<< index << "; "
<< "first_available_index = " << first_available_index_
<< "; "
<< "size = " << Size() << ")";
}
// 'at' does size checking.
return items_.at(index - first_available_index_);
}
void RecyclingVector::PushBack(torch::Tensor item) {
// Note: -1 is a larger number when treated as unsigned
if (items_.size() == static_cast<size_t>(items_to_hold_)) {
items_.pop_front();
++first_available_index_;
}
items_.push_back(item);
}
int32_t RecyclingVector::Size() const {
return first_available_index_ + static_cast<int32_t>(items_.size());
}
template <class C>
OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
const typename C::Options &opts)
: computer_(opts),
window_function_(opts.frame_opts, opts.device),
features_(opts.frame_opts.max_feature_vectors),
input_finished_(false),
waveform_offset_(0) {}
template <class C>
void OnlineGenericBaseFeature<C>::AcceptWaveform(
float sampling_rate, const torch::Tensor &original_waveform) {
if (original_waveform.numel() == 0) return; // Nothing to do.
KALDIFEAT_ASSERT(original_waveform.dim() == 1);
KALDIFEAT_ASSERT(sampling_rate == computer_.GetFrameOptions().samp_freq);
if (input_finished_)
KALDIFEAT_ERR << "AcceptWaveform called after InputFinished() was called.";
if (waveform_remainder_.numel() == 0) {
waveform_remainder_ = original_waveform;
} else {
waveform_remainder_ =
torch::cat({waveform_remainder_, original_waveform}, /*dim*/ 0);
}
ComputeFeatures();
}
template <class C>
void OnlineGenericBaseFeature<C>::InputFinished() {
input_finished_ = true;
ComputeFeatures();
}
template <class C>
void OnlineGenericBaseFeature<C>::ComputeFeatures() {
const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
int64_t num_samples_total = waveform_offset_ + waveform_remainder_.numel();
int32_t num_frames_old = features_.Size();
int32_t num_frames_new =
NumFrames(num_samples_total, frame_opts, input_finished_);
KALDIFEAT_ASSERT(num_frames_new >= num_frames_old);
// note: this online feature-extraction code does not support VTLN.
float vtln_warp = 1.0;
for (int32_t frame = num_frames_old; frame < num_frames_new; ++frame) {
torch::Tensor window =
ExtractWindow(waveform_offset_, waveform_remainder_, frame, frame_opts);
// TODO(fangjun): We can compute all feature frames at once
torch::Tensor this_feature =
computer_.ComputeFeatures(window.unsqueeze(0), vtln_warp);
features_.PushBack(this_feature);
}
// OK, we will now discard any portion of the signal that will not be
// necessary to compute frames in the future.
int64_t first_sample_of_next_frame =
FirstSampleOfFrame(num_frames_new, frame_opts);
int32_t samples_to_discard = first_sample_of_next_frame - waveform_offset_;
if (samples_to_discard > 0) {
// discard the leftmost part of the waveform that we no longer need.
int32_t new_num_samples = waveform_remainder_.numel() - samples_to_discard;
if (new_num_samples <= 0) {
// odd, but we'll try to handle it.
waveform_offset_ += waveform_remainder_.numel();
waveform_remainder_.resize_({0});
} else {
using torch::indexing::None;
using torch::indexing::Slice;
waveform_remainder_ =
waveform_remainder_.index({Slice(samples_to_discard, None)});
waveform_offset_ += samples_to_discard;
}
}
}
// instantiate the templates defined here for MFCC, PLP and filterbank classes.
template class OnlineGenericBaseFeature<Mfcc>;
template class OnlineGenericBaseFeature<Plp>;
template class OnlineGenericBaseFeature<Fbank>;
} // namespace kaldifeat

View File

@ -1,127 +0,0 @@
// kaldifeat/csrc/online-feature.h
//
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
// This file is copied/modified from kaldi/src/feat/online-feature.h
#ifndef KALDIFEAT_CSRC_ONLINE_FEATURE_H_
#define KALDIFEAT_CSRC_ONLINE_FEATURE_H_
#include <deque>
#include "kaldifeat/csrc/feature-fbank.h"
#include "kaldifeat/csrc/feature-mfcc.h"
#include "kaldifeat/csrc/feature-plp.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/online-feature-itf.h"
namespace kaldifeat {
/// This class serves as a storage for feature vectors with an option to limit
/// the memory usage by removing old elements. The deleted frames indices are
/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
/// provides the indices as if no deletion was being performed.
/// This is useful when processing very long recordings which would otherwise
/// cause the memory to eventually blow up when the features are not being
/// removed.
class RecyclingVector {
public:
/// By default it does not remove any elements.
explicit RecyclingVector(int32_t items_to_hold = -1);
~RecyclingVector() = default;
RecyclingVector(const RecyclingVector &) = delete;
RecyclingVector &operator=(const RecyclingVector &) = delete;
torch::Tensor At(int32_t index) const;
void PushBack(torch::Tensor item);
/// This method returns the size as if no "recycling" had happened,
/// i.e. equivalent to the number of times the PushBack method has been
/// called.
int32_t Size() const;
private:
std::deque<torch::Tensor> items_;
int32_t items_to_hold_;
int32_t first_available_index_;
};
/// This is a templated class for online feature extraction;
/// it's templated on a class like MfccComputer or PlpComputer
/// that does the basic feature extraction.
template <class C>
class OnlineGenericBaseFeature : public OnlineFeatureInterface {
public:
// Constructor from options class
explicit OnlineGenericBaseFeature(const typename C::Options &opts);
int32_t Dim() const override { return computer_.Dim(); }
float FrameShiftInSeconds() const override {
return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
}
int32_t NumFramesReady() const override { return features_.Size(); }
// Note: IsLastFrame() will only ever return true if you have called
// InputFinished() (and this frame is the last frame).
bool IsLastFrame(int32_t frame) const override {
return input_finished_ && frame == NumFramesReady() - 1;
}
torch::Tensor GetFrame(int32_t frame) override { return features_.At(frame); }
// This would be called from the application, when you get
// more wave data. Note: the sampling_rate is only provided so
// the code can assert that it matches the sampling rate
// expected in the options.
void AcceptWaveform(float sampling_rate,
const torch::Tensor &waveform) override;
// InputFinished() tells the class you won't be providing any
// more waveform. This will help flush out the last frame or two
// of features, in the case where snip-edges == false; it also
// affects the return value of IsLastFrame().
void InputFinished() override;
private:
// This function computes any additional feature frames that it is possible to
// compute from 'waveform_remainder_', which at this point may contain more
// than just a remainder-sized quantity (because AcceptWaveform() appends to
// waveform_remainder_ before calling this function). It adds these feature
// frames to features_, and shifts off any now-unneeded samples of input from
// waveform_remainder_ while incrementing waveform_offset_ by the same amount.
void ComputeFeatures();
C computer_; // class that does the MFCC or PLP or filterbank computation
FeatureWindowFunction window_function_;
// features_ is the Mfcc or Plp or Fbank features that we have already
// computed.
RecyclingVector features_;
// True if the user has called "InputFinished()"
bool input_finished_;
// waveform_offset_ is the number of samples of waveform that we have
// already discarded, i.e. that were prior to 'waveform_remainder_'.
int64_t waveform_offset_;
// waveform_remainder_ is a short piece of waveform that we may need to keep
// after extracting all the whole frames we can (whatever length of feature
// will be required for the next phase of computation).
// It is a 1-D tensor
torch::Tensor waveform_remainder_;
};
using OnlineMfcc = OnlineGenericBaseFeature<Mfcc>;
using OnlinePlp = OnlineGenericBaseFeature<Plp>;
using OnlineFbank = OnlineGenericBaseFeature<Fbank>;
} // namespace kaldifeat
#endif // KALDIFEAT_CSRC_ONLINE_FEATURE_H_

View File

@ -20,7 +20,7 @@
#include <string>
#include "torch/script.h"
#include "torch/torch.h"
namespace kaldifeat {

View File

@ -2,8 +2,7 @@
//
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
#include "torch/all.h"
#include "torch/script.h"
#include "torch/torch.h"
static void TestPreemph() {
torch::Tensor a = torch::arange(0, 12).reshape({3, 4}).to(torch::kFloat);

View File

@ -1,88 +0,0 @@
/**
* Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
*
* See LICENSE for clarification regarding multiple authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "kaldifeat/csrc/whisper-fbank.h"
#include <cmath>
#include <vector>
#include "kaldifeat/csrc/mel-computations.h"
#include "kaldifeat/csrc/whisper-mel-bank.h"
#include "kaldifeat/csrc/whisper-v3-mel-bank.h"
#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif
namespace kaldifeat {
WhisperFbankComputer::WhisperFbankComputer(const WhisperFbankOptions &opts)
: opts_(opts) {
if (opts.num_mels == 80) {
mel_banks_ = std::make_unique<MelBanks>(kWhisperMelArray, kWhisperMelRows,
kWhisperMelCols, opts.device);
} else if (opts.num_mels == 128) {
mel_banks_ = std::make_unique<MelBanks>(
kWhisperV3MelArray, kWhisperV3MelRows, kWhisperV3MelCols, opts.device);
} else {
KALDIFEAT_ERR << "Unsupported num_mels: " << opts.num_mels
<< ". Support only 80 and 128";
}
opts_.frame_opts.samp_freq = 16000;
opts_.frame_opts.frame_shift_ms = 10;
opts_.frame_opts.frame_length_ms = 25;
opts_.frame_opts.dither = 0;
opts_.frame_opts.preemph_coeff = 0;
opts_.frame_opts.remove_dc_offset = false;
opts_.frame_opts.window_type = "hann";
opts_.frame_opts.round_to_power_of_two = false;
opts_.frame_opts.snip_edges = false;
}
torch::Tensor WhisperFbankComputer::Compute(
torch::Tensor /*signal_raw_log_energy*/, float /*vtln_warp*/,
const torch::Tensor &signal_frame) {
KALDIFEAT_ASSERT(signal_frame.dim() == 2);
KALDIFEAT_ASSERT(signal_frame.size(1) == opts_.frame_opts.PaddedWindowSize());
// note spectrum is in magnitude, not power, because of `abs()`
#if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
// signal_frame shape: [x, 512]
// power shape [x, 257]
torch::Tensor power = torch::fft::rfft(signal_frame).abs().pow(2);
#else
// signal_frame shape [x, 512]
// real_imag shape [x, 257, 2],
// where [..., 0] is the real part
// [..., 1] is the imaginary part
torch::Tensor real_imag = torch::rfft(signal_frame, 1);
torch::Tensor real = real_imag.index({"...", 0});
torch::Tensor imag = real_imag.index({"...", 1});
torch::Tensor power = (real.square() + imag.square());
#endif
torch::Tensor mel_energies = mel_banks_->Compute(power);
torch::Tensor log_spec = torch::clamp_min(mel_energies, 1e-10).log10();
log_spec = torch::maximum(log_spec, log_spec.max() - 8.0);
torch::Tensor mel = (log_spec + 4.0) / 4.0;
return mel;
}
} // namespace kaldifeat

View File

@ -1,78 +0,0 @@
/**
* Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
*
* See LICENSE for clarification regarding multiple authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef KALDIFEAT_CSRC_WHISPER_FBANK_H_
#define KALDIFEAT_CSRC_WHISPER_FBANK_H_
#include <memory>
#include <string>
#include <vector>
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/mel-computations.h"
namespace kaldifeat {
struct WhisperFbankOptions {
FrameExtractionOptions frame_opts;
// for large v3, please use 128
int32_t num_mels = 80;
torch::Device device{"cpu"};
std::string ToString() const {
std::ostringstream os;
os << "WhisperFbankOptions(";
os << "frame_opts=" << frame_opts.ToString() << ", ";
os << "num_mels=" << num_mels << ", ";
os << "device=\"" << device << "\")";
return os.str();
}
};
class WhisperFbankComputer {
public:
// note: Only frame_opts.device is used. All other fields from frame_opts
// are ignored
explicit WhisperFbankComputer(const WhisperFbankOptions &opts = {});
int32_t Dim() const { return opts_.num_mels; }
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
const WhisperFbankOptions &GetOptions() const { return opts_; }
torch::Tensor Compute(torch::Tensor /*signal_raw_log_energy*/,
float /*vtln_warp*/, const torch::Tensor &signal_frame);
// if true, compute log_energy_pre_window but after dithering and dc removal
bool NeedRawLogEnergy() const { return false; }
using Options = WhisperFbankOptions;
private:
WhisperFbankOptions opts_;
std::unique_ptr<MelBanks> mel_banks_;
};
using WhisperFbank = OfflineFeatureTpl<WhisperFbankComputer>;
} // namespace kaldifeat
#endif // KALDIFEAT_CSRC_WHISPER_FBANK_H_

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1 @@
add_subdirectory(csrc)
if(kaldifeat_BUILD_TESTS)
add_subdirectory(tests)
endif()

View File

@ -7,34 +7,6 @@ pybind11_add_module(_kaldifeat
feature-window.cc
kaldifeat.cc
mel-computations.cc
online-feature.cc
utils.cc
whisper-fbank.cc
)
if(APPLE)
execute_process(
COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
)
message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
endif()
if(NOT WIN32)
target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${kaldifeat_rpath_origin}/kaldifeat/${CMAKE_INSTALL_LIBDIR}")
endif()
target_link_libraries(_kaldifeat PRIVATE kaldifeat_core)
if(UNIX AND NOT APPLE)
target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/libtorch_python.so)
# target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARY})
elseif(WIN32)
target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/torch_python.lib)
# target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARIES})
endif()
install(TARGETS _kaldifeat
DESTINATION ../
)
target_link_libraries(_kaldifeat PRIVATE ${TORCH_DIR}/lib/libtorch_python.so)

View File

@ -4,11 +4,9 @@
#include "kaldifeat/python/csrc/feature-fbank.h"
#include <memory>
#include <string>
#include "kaldifeat/csrc/feature-fbank.h"
#include "kaldifeat/python/csrc/utils.h"
namespace kaldifeat {
@ -16,35 +14,6 @@ static void PybindFbankOptions(py::module &m) {
using PyClass = FbankOptions;
py::class_<PyClass>(m, "FbankOptions")
.def(py::init<>())
.def(py::init([](const MelBanksOptions &mel_opts,
const FrameExtractionOptions &frame_opts =
FrameExtractionOptions(),
bool use_energy = false, float energy_floor = 0.0f,
bool raw_energy = true, bool htk_compat = false,
bool use_log_fbank = true, bool use_power = true,
py::object device =
py::str("cpu")) -> std::unique_ptr<FbankOptions> {
auto opts = std::make_unique<FbankOptions>();
opts->frame_opts = frame_opts;
opts->mel_opts = mel_opts;
opts->use_energy = use_energy;
opts->energy_floor = energy_floor;
opts->raw_energy = raw_energy;
opts->htk_compat = htk_compat;
opts->use_log_fbank = use_log_fbank;
opts->use_power = use_power;
std::string s = static_cast<py::str>(device);
opts->device = torch::Device(s);
return opts;
}),
py::arg("mel_opts"),
py::arg("frame_opts") = FrameExtractionOptions(),
py::arg("use_energy") = false, py::arg("energy_floor") = 0.0f,
py::arg("raw_energy") = true, py::arg("htk_compat") = false,
py::arg("use_log_fbank") = true, py::arg("use_power") = true,
py::arg("device") = py::str("cpu"))
.def_readwrite("frame_opts", &PyClass::frame_opts)
.def_readwrite("mel_opts", &PyClass::mel_opts)
.def_readwrite("use_energy", &PyClass::use_energy)
@ -64,15 +33,7 @@ static void PybindFbankOptions(py::module &m) {
self.device = torch::Device(s);
})
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static(
"from_dict",
[](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }));
[](const PyClass &self) -> std::string { return self.ToString(); });
}
static void PybindFbank(py::module &m) {
@ -82,14 +43,7 @@ static void PybindFbank(py::module &m) {
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
},
[](py::dict dict) -> std::unique_ptr<PyClass> {
return std::make_unique<PyClass>(FbankOptionsFromDict(dict));
}));
py::arg("vtln_warp"));
}
void PybindFeatureFbank(py::module &m) {

View File

@ -4,11 +4,9 @@
#include "kaldifeat/python/csrc/feature-mfcc.h"
#include <memory>
#include <string>
#include "kaldifeat/csrc/feature-mfcc.h"
#include "kaldifeat/python/csrc/utils.h"
namespace kaldifeat {
@ -16,35 +14,6 @@ void PybindMfccOptions(py::module &m) {
using PyClass = MfccOptions;
py::class_<PyClass>(m, "MfccOptions")
.def(py::init<>())
.def(py::init([](const MelBanksOptions &mel_opts,
const FrameExtractionOptions &frame_opts =
FrameExtractionOptions(),
int32_t num_ceps = 13, bool use_energy = true,
float energy_floor = 0.0, bool raw_energy = true,
float cepstral_lifter = 22.0, bool htk_compat = false,
py::object device =
py::str("cpu")) -> std::unique_ptr<MfccOptions> {
auto opts = std::make_unique<MfccOptions>();
opts->frame_opts = frame_opts;
opts->mel_opts = mel_opts;
opts->num_ceps = num_ceps;
opts->use_energy = use_energy;
opts->energy_floor = energy_floor;
opts->raw_energy = raw_energy;
opts->cepstral_lifter = cepstral_lifter;
opts->htk_compat = htk_compat;
std::string s = static_cast<py::str>(device);
opts->device = torch::Device(s);
return opts;
}),
py::arg("mel_opts"),
py::arg("frame_opts") = FrameExtractionOptions(),
py::arg("num_ceps") = 13, py::arg("use_energy") = true,
py::arg("energy_floor") = 0.0f, py::arg("raw_energy") = true,
py::arg("cepstral_lifter") = 22.0, py::arg("htk_compat") = false,
py::arg("device") = py::str("cpu"))
.def_readwrite("frame_opts", &PyClass::frame_opts)
.def_readwrite("mel_opts", &PyClass::mel_opts)
.def_readwrite("num_ceps", &PyClass::num_ceps)
@ -64,15 +33,7 @@ void PybindMfccOptions(py::module &m) {
self.device = torch::Device(s);
})
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static(
"from_dict",
[](py::dict dict) -> PyClass { return MfccOptionsFromDict(dict); })
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass { return MfccOptionsFromDict(dict); }));
[](const PyClass &self) -> std::string { return self.ToString(); });
}
static void PybindMfcc(py::module &m) {
@ -82,14 +43,7 @@ static void PybindMfcc(py::module &m) {
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
},
[](py::dict dict) -> std::unique_ptr<PyClass> {
return std::make_unique<PyClass>(MfccOptionsFromDict(dict));
}));
py::arg("vtln_warp"));
}
void PybindFeatureMfcc(py::module &m) {

View File

@ -4,11 +4,9 @@
#include "kaldifeat/python/csrc/feature-plp.h"
#include <memory>
#include <string>
#include "kaldifeat/csrc/feature-plp.h"
#include "kaldifeat/python/csrc/utils.h"
namespace kaldifeat {
@ -16,41 +14,6 @@ void PybindPlpOptions(py::module &m) {
using PyClass = PlpOptions;
py::class_<PyClass>(m, "PlpOptions")
.def(py::init<>())
.def(py::init([](const MelBanksOptions &mel_opts,
const FrameExtractionOptions &frame_opts =
FrameExtractionOptions(),
int32_t lpc_order = 12, int32_t num_ceps = 13,
bool use_energy = true, float energy_floor = 0.0,
bool raw_energy = true, float compress_factor = 0.33333,
int32_t cepstral_lifter = 22, float cepstral_scale = 1.0,
bool htk_compat = false,
py::object device =
py::str("cpu")) -> std::unique_ptr<PlpOptions> {
auto opts = std::make_unique<PlpOptions>();
opts->frame_opts = frame_opts;
opts->mel_opts = mel_opts;
opts->lpc_order = lpc_order;
opts->num_ceps = num_ceps;
opts->use_energy = use_energy;
opts->energy_floor = energy_floor;
opts->raw_energy = raw_energy;
opts->compress_factor = compress_factor;
opts->cepstral_lifter = cepstral_lifter;
opts->cepstral_scale = cepstral_scale;
opts->htk_compat = htk_compat;
std::string s = static_cast<py::str>(device);
opts->device = torch::Device(s);
return opts;
}),
py::arg("mel_opts"),
py::arg("frame_opts") = FrameExtractionOptions(),
py::arg("lpc_order") = 12, py::arg("num_ceps") = 13,
py::arg("use_energy") = true, py::arg("energy_floor") = 0.0,
py::arg("raw_energy") = true, py::arg("compress_factor") = 0.33333,
py::arg("cepstral_lifter") = 22, py::arg("cepstral_scale") = 1.0,
py::arg("htk_compat") = false, py::arg("device") = py::str("cpu"))
.def_readwrite("frame_opts", &PyClass::frame_opts)
.def_readwrite("mel_opts", &PyClass::mel_opts)
.def_readwrite("lpc_order", &PyClass::lpc_order)
@ -73,15 +36,7 @@ void PybindPlpOptions(py::module &m) {
self.device = torch::Device(s);
})
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static(
"from_dict",
[](py::dict dict) -> PyClass { return PlpOptionsFromDict(dict); })
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass { return PlpOptionsFromDict(dict); }));
[](const PyClass &self) -> std::string { return self.ToString(); });
}
static void PybindPlp(py::module &m) {
@ -91,14 +46,7 @@ static void PybindPlp(py::module &m) {
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
},
[](py::dict dict) -> std::unique_ptr<PyClass> {
return std::make_unique<PyClass>(PlpOptionsFromDict(dict));
}));
py::arg("vtln_warp"));
}
void PybindFeaturePlp(py::module &m) {

View File

@ -4,38 +4,16 @@
#include "kaldifeat/python/csrc/feature-spectrogram.h"
#include <memory>
#include <string>
#include "kaldifeat/csrc/feature-spectrogram.h"
#include "kaldifeat/python/csrc/utils.h"
namespace kaldifeat {
static void PybindSpectrogramOptions(py::module &m) {
using PyClass = SpectrogramOptions;
py::class_<PyClass>(m, "SpectrogramOptions")
.def(py::init([](const FrameExtractionOptions &frame_opts =
FrameExtractionOptions(),
float energy_floor = 0.0, bool raw_energy = true,
bool return_raw_fft = false,
py::object device = py::str(
"cpu")) -> std::unique_ptr<SpectrogramOptions> {
auto opts = std::make_unique<SpectrogramOptions>();
opts->frame_opts = frame_opts;
opts->energy_floor = energy_floor;
opts->raw_energy = raw_energy;
opts->return_raw_fft = return_raw_fft;
std::string s = static_cast<py::str>(device);
opts->device = torch::Device(s);
return opts;
}),
py::arg("frame_opts") = FrameExtractionOptions(),
py::arg("energy_floor") = 0.0, py::arg("raw_energy") = true,
py::arg("return_raw_fft") = false,
py::arg("device") = py::str("cpu"))
.def(py::init<>())
.def_readwrite("frame_opts", &PyClass::frame_opts)
.def_readwrite("energy_floor", &PyClass::energy_floor)
.def_readwrite("raw_energy", &PyClass::raw_energy)
@ -52,18 +30,7 @@ static void PybindSpectrogramOptions(py::module &m) {
self.device = torch::Device(s);
})
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static("from_dict",
[](py::dict dict) -> PyClass {
return SpectrogramOptionsFromDict(dict);
})
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass {
return SpectrogramOptionsFromDict(dict);
}));
[](const PyClass &self) -> std::string { return self.ToString(); });
}
static void PybindSpectrogram(py::module &m) {
@ -73,14 +40,7 @@ static void PybindSpectrogram(py::module &m) {
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
},
[](py::dict dict) -> std::unique_ptr<PyClass> {
return std::make_unique<PyClass>(SpectrogramOptionsFromDict(dict));
}));
py::arg("vtln_warp"));
}
void PybindFeatureSpectrogram(py::module &m) {

View File

@ -4,78 +4,38 @@
#include "kaldifeat/python/csrc/feature-window.h"
#include <memory>
#include <string>
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/python/csrc/utils.h"
namespace kaldifeat {
static void PybindFrameExtractionOptions(py::module &m) {
using PyClass = FrameExtractionOptions;
py::class_<PyClass>(m, "FrameExtractionOptions")
.def(
py::init([](float samp_freq = 16000, float frame_shift_ms = 10.0f,
float frame_length_ms = 25.0f, float dither = 1.0f,
float preemph_coeff = 0.97f, bool remove_dc_offset = true,
const std::string &window_type = "povey",
bool round_to_power_of_two = true,
float blackman_coeff = 0.42f, bool snip_edges = true,
int32_t max_feature_vectors =
-1) -> std::unique_ptr<FrameExtractionOptions> {
auto opts = std::make_unique<FrameExtractionOptions>();
opts->samp_freq = samp_freq;
opts->frame_shift_ms = frame_shift_ms;
opts->frame_length_ms = frame_length_ms;
opts->dither = dither;
opts->preemph_coeff = preemph_coeff;
opts->remove_dc_offset = remove_dc_offset;
opts->window_type = window_type;
opts->round_to_power_of_two = round_to_power_of_two;
opts->blackman_coeff = blackman_coeff;
opts->snip_edges = snip_edges;
opts->max_feature_vectors = max_feature_vectors;
return opts;
}),
py::arg("samp_freq") = 16000, py::arg("frame_shift_ms") = 10.0f,
py::arg("frame_length_ms") = 25.0f, py::arg("dither") = 1.0f,
py::arg("preemph_coeff") = 0.97f, py::arg("remove_dc_offset") = true,
py::arg("window_type") = "povey",
py::arg("round_to_power_of_two") = true,
py::arg("blackman_coeff") = 0.42f, py::arg("snip_edges") = true,
py::arg("max_feature_vectors") = -1)
.def_readwrite("samp_freq", &PyClass::samp_freq)
.def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
.def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
.def_readwrite("dither", &PyClass::dither)
.def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
.def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset)
.def_readwrite("window_type", &PyClass::window_type)
.def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two)
.def_readwrite("blackman_coeff", &PyClass::blackman_coeff)
.def_readwrite("snip_edges", &PyClass::snip_edges)
.def_readwrite("max_feature_vectors", &PyClass::max_feature_vectors)
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static("from_dict",
[](py::dict dict) -> PyClass {
return FrameExtractionOptionsFromDict(dict);
})
py::class_<FrameExtractionOptions>(m, "FrameExtractionOptions")
.def(py::init<>())
.def_readwrite("samp_freq", &FrameExtractionOptions::samp_freq)
.def_readwrite("frame_shift_ms", &FrameExtractionOptions::frame_shift_ms)
.def_readwrite("frame_length_ms",
&FrameExtractionOptions::frame_length_ms)
.def_readwrite("dither", &FrameExtractionOptions::dither)
.def_readwrite("preemph_coeff", &FrameExtractionOptions::preemph_coeff)
.def_readwrite("remove_dc_offset",
&FrameExtractionOptions::remove_dc_offset)
.def_readwrite("window_type", &FrameExtractionOptions::window_type)
.def_readwrite("round_to_power_of_two",
&FrameExtractionOptions::round_to_power_of_two)
.def_readwrite("blackman_coeff", &FrameExtractionOptions::blackman_coeff)
.def_readwrite("snip_edges", &FrameExtractionOptions::snip_edges)
#if 0
.def_readwrite("allow_downsample",
&PyClass::allow_downsample)
.def_readwrite("allow_upsample", &PyClass::allow_upsample)
&FrameExtractionOptions::allow_downsample)
.def_readwrite("allow_upsample", &FrameExtractionOptions::allow_upsample)
.def_readwrite("max_feature_vectors",
&FrameExtractionOptions::max_feature_vectors)
#endif
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass {
return FrameExtractionOptionsFromDict(dict);
}));
.def("__str__", [](const FrameExtractionOptions &self) -> std::string {
return self.ToString();
});
m.def("num_frames", &NumFrames, py::arg("num_samples"), py::arg("opts"),
py::arg("flush") = true);

View File

@ -11,8 +11,6 @@
#include "kaldifeat/python/csrc/feature-spectrogram.h"
#include "kaldifeat/python/csrc/feature-window.h"
#include "kaldifeat/python/csrc/mel-computations.h"
#include "kaldifeat/python/csrc/online-feature.h"
#include "kaldifeat/python/csrc/whisper-fbank.h"
#include "torch/torch.h"
namespace kaldifeat {
@ -23,11 +21,9 @@ PYBIND11_MODULE(_kaldifeat, m) {
PybindFeatureWindow(m);
PybindMelComputations(m);
PybindFeatureFbank(m);
PybindWhisperFbank(&m);
PybindFeatureMfcc(m);
PybindFeaturePlp(m);
PybindFeatureSpectrogram(m);
PybindOnlineFeature(m);
}
} // namespace kaldifeat

View File

@ -6,7 +6,6 @@
#define KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_
#include "pybind11/pybind11.h"
#include "torch/torch.h"
namespace py = pybind11;
#endif // KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_

View File

@ -4,35 +4,16 @@
#include "kaldifeat/python/csrc/mel-computations.h"
#include <memory>
#include <string>
#include "kaldifeat/csrc/mel-computations.h"
#include "kaldifeat/python/csrc/utils.h"
namespace kaldifeat {
static void PybindMelBanksOptions(py::module &m) {
using PyClass = MelBanksOptions;
py::class_<PyClass>(m, "MelBanksOptions")
.def(py::init(
[](int32_t num_bins = 25, float low_freq = 20,
float high_freq = 0, float vtln_low = 100,
float vtln_high = -500,
bool debug_mel = false) -> std::unique_ptr<MelBanksOptions> {
auto opts = std::make_unique<MelBanksOptions>();
opts->num_bins = num_bins;
opts->low_freq = low_freq;
opts->high_freq = high_freq;
opts->vtln_low = vtln_low;
opts->vtln_high = vtln_high;
return opts;
}),
py::arg("num_bins") = 25, py::arg("low_freq") = 20,
py::arg("high_freq") = 0, py::arg("vtln_low") = 100,
py::arg("vtln_high") = -500, py::arg("debug_mel") = false)
.def(py::init<>())
.def_readwrite("num_bins", &PyClass::num_bins)
.def_readwrite("low_freq", &PyClass::low_freq)
.def_readwrite("high_freq", &PyClass::high_freq)
@ -41,18 +22,7 @@ static void PybindMelBanksOptions(py::module &m) {
.def_readwrite("debug_mel", &PyClass::debug_mel)
.def_readwrite("htk_mode", &PyClass::htk_mode)
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static("from_dict",
[](py::dict dict) -> PyClass {
return MelBanksOptionsFromDict(dict);
})
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass {
return MelBanksOptionsFromDict(dict);
}));
[](const PyClass &self) -> std::string { return self.ToString(); });
}
void PybindMelComputations(py::module &m) { PybindMelBanksOptions(m); }

View File

@ -1,39 +0,0 @@
// kaldifeat/python/csrc/online-feature.cc
//
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
#include "kaldifeat/python/csrc/online-feature.h"
#include <string>
#include "kaldifeat/csrc/online-feature.h"
namespace kaldifeat {
template <typename C>
void PybindOnlineFeatureTpl(py::module &m, const std::string &class_name,
const std::string &class_help_doc = "") {
using PyClass = OnlineGenericBaseFeature<C>;
using Options = typename C::Options;
py::class_<PyClass>(m, class_name.c_str(), class_help_doc.c_str())
.def(py::init<const Options &>(), py::arg("opts"))
.def_property_readonly("dim", &PyClass::Dim)
.def_property_readonly("frame_shift_in_seconds",
&PyClass::FrameShiftInSeconds)
.def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
.def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
.def("get_frame", &PyClass::GetFrame, py::arg("frame"))
.def("get_frames", &PyClass::GetFrames, py::arg("frames"),
py::call_guard<py::gil_scoped_release>())
.def("accept_waveform", &PyClass::AcceptWaveform,
py::arg("sampling_rate"), py::arg("waveform"),
py::call_guard<py::gil_scoped_release>())
.def("input_finished", &PyClass::InputFinished);
}
void PybindOnlineFeature(py::module &m) {
PybindOnlineFeatureTpl<Mfcc>(m, "OnlineMfcc");
PybindOnlineFeatureTpl<Fbank>(m, "OnlineFbank");
PybindOnlineFeatureTpl<Plp>(m, "OnlinePlp");
}
} // namespace kaldifeat

View File

@ -1,16 +0,0 @@
// kaldifeat/python/csrc/online-feature.h
//
// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
#ifndef KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_
#define KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_
#include "kaldifeat/python/csrc/kaldifeat.h"
namespace kaldifeat {
void PybindOnlineFeature(py::module &m);
} // namespace kaldifeat
#endif // KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_

View File

@ -1,284 +0,0 @@
// kaldifeat/python/csrc/utils.cc
//
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
#include "kaldifeat/python/csrc/utils.h"
#include <string>
#include "kaldifeat/csrc/feature-window.h"
#define FROM_DICT(type, key) \
if (dict.contains(#key)) { \
opts.key = py::type(dict[#key]); \
}
#define AS_DICT(key) dict[#key] = opts.key
namespace kaldifeat {
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict) {
FrameExtractionOptions opts;
FROM_DICT(float_, samp_freq);
FROM_DICT(float_, frame_shift_ms);
FROM_DICT(float_, frame_length_ms);
FROM_DICT(float_, dither);
FROM_DICT(float_, preemph_coeff);
FROM_DICT(bool_, remove_dc_offset);
FROM_DICT(str, window_type);
FROM_DICT(bool_, round_to_power_of_two);
FROM_DICT(float_, blackman_coeff);
FROM_DICT(bool_, snip_edges);
FROM_DICT(int_, max_feature_vectors);
return opts;
}
py::dict AsDict(const FrameExtractionOptions &opts) {
py::dict dict;
AS_DICT(samp_freq);
AS_DICT(frame_shift_ms);
AS_DICT(frame_length_ms);
AS_DICT(dither);
AS_DICT(preemph_coeff);
AS_DICT(remove_dc_offset);
AS_DICT(window_type);
AS_DICT(round_to_power_of_two);
AS_DICT(blackman_coeff);
AS_DICT(snip_edges);
AS_DICT(max_feature_vectors);
return dict;
}
MelBanksOptions MelBanksOptionsFromDict(py::dict dict) {
MelBanksOptions opts;
FROM_DICT(int_, num_bins);
FROM_DICT(float_, low_freq);
FROM_DICT(float_, high_freq);
FROM_DICT(float_, vtln_low);
FROM_DICT(float_, vtln_high);
FROM_DICT(bool_, debug_mel);
FROM_DICT(bool_, htk_mode);
return opts;
}
py::dict AsDict(const MelBanksOptions &opts) {
py::dict dict;
AS_DICT(num_bins);
AS_DICT(low_freq);
AS_DICT(high_freq);
AS_DICT(vtln_low);
AS_DICT(vtln_high);
AS_DICT(debug_mel);
AS_DICT(htk_mode);
return dict;
}
FbankOptions FbankOptionsFromDict(py::dict dict) {
FbankOptions opts;
if (dict.contains("frame_opts")) {
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
}
if (dict.contains("mel_opts")) {
opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
}
FROM_DICT(bool_, use_energy);
FROM_DICT(float_, energy_floor);
FROM_DICT(bool_, raw_energy);
FROM_DICT(bool_, htk_compat);
FROM_DICT(bool_, use_log_fbank);
FROM_DICT(bool_, use_power);
if (dict.contains("device")) {
opts.device = torch::Device(std::string(py::str(dict["device"])));
}
return opts;
}
py::dict AsDict(const FbankOptions &opts) {
py::dict dict;
dict["frame_opts"] = AsDict(opts.frame_opts);
dict["mel_opts"] = AsDict(opts.mel_opts);
AS_DICT(use_energy);
AS_DICT(energy_floor);
AS_DICT(raw_energy);
AS_DICT(htk_compat);
AS_DICT(use_log_fbank);
AS_DICT(use_power);
auto torch_device = py::module_::import("torch").attr("device");
dict["device"] = torch_device(opts.device.str());
return dict;
}
WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict) {
WhisperFbankOptions opts;
if (dict.contains("frame_opts")) {
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
}
FROM_DICT(int_, num_mels);
if (dict.contains("device")) {
opts.device = torch::Device(std::string(py::str(dict["device"])));
}
return opts;
}
py::dict AsDict(const WhisperFbankOptions &opts) {
py::dict dict;
dict["frame_opts"] = AsDict(opts.frame_opts);
AS_DICT(num_mels);
auto torch_device = py::module_::import("torch").attr("device");
dict["device"] = torch_device(opts.device.str());
return dict;
}
MfccOptions MfccOptionsFromDict(py::dict dict) {
MfccOptions opts;
if (dict.contains("frame_opts")) {
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
}
if (dict.contains("mel_opts")) {
opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
}
FROM_DICT(int_, num_ceps);
FROM_DICT(bool_, use_energy);
FROM_DICT(float_, energy_floor);
FROM_DICT(bool_, raw_energy);
FROM_DICT(float_, cepstral_lifter);
FROM_DICT(bool_, htk_compat);
if (dict.contains("device")) {
opts.device = torch::Device(std::string(py::str(dict["device"])));
}
return opts;
}
py::dict AsDict(const MfccOptions &opts) {
py::dict dict;
dict["frame_opts"] = AsDict(opts.frame_opts);
dict["mel_opts"] = AsDict(opts.mel_opts);
AS_DICT(num_ceps);
AS_DICT(use_energy);
AS_DICT(energy_floor);
AS_DICT(raw_energy);
AS_DICT(cepstral_lifter);
AS_DICT(htk_compat);
auto torch_device = py::module_::import("torch").attr("device");
dict["device"] = torch_device(opts.device.str());
return dict;
}
SpectrogramOptions SpectrogramOptionsFromDict(py::dict dict) {
SpectrogramOptions opts;
if (dict.contains("frame_opts")) {
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
}
FROM_DICT(float_, energy_floor);
FROM_DICT(bool_, raw_energy);
// FROM_DICT(bool_, return_raw_fft);
if (dict.contains("device")) {
opts.device = torch::Device(std::string(py::str(dict["device"])));
}
return opts;
}
py::dict AsDict(const SpectrogramOptions &opts) {
py::dict dict;
dict["frame_opts"] = AsDict(opts.frame_opts);
AS_DICT(energy_floor);
AS_DICT(raw_energy);
auto torch_device = py::module_::import("torch").attr("device");
dict["device"] = torch_device(opts.device.str());
return dict;
}
PlpOptions PlpOptionsFromDict(py::dict dict) {
PlpOptions opts;
if (dict.contains("frame_opts")) {
opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
}
if (dict.contains("mel_opts")) {
opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
}
FROM_DICT(int_, lpc_order);
FROM_DICT(int_, num_ceps);
FROM_DICT(bool_, use_energy);
FROM_DICT(float_, energy_floor);
FROM_DICT(bool_, raw_energy);
FROM_DICT(float_, compress_factor);
FROM_DICT(int_, cepstral_lifter);
FROM_DICT(float_, cepstral_scale);
FROM_DICT(bool_, htk_compat);
if (dict.contains("device")) {
opts.device = torch::Device(std::string(py::str(dict["device"])));
}
return opts;
}
py::dict AsDict(const PlpOptions &opts) {
py::dict dict;
dict["frame_opts"] = AsDict(opts.frame_opts);
dict["mel_opts"] = AsDict(opts.mel_opts);
AS_DICT(lpc_order);
AS_DICT(num_ceps);
AS_DICT(use_energy);
AS_DICT(energy_floor);
AS_DICT(raw_energy);
AS_DICT(compress_factor);
AS_DICT(cepstral_lifter);
AS_DICT(cepstral_scale);
AS_DICT(htk_compat);
auto torch_device = py::module_::import("torch").attr("device");
dict["device"] = torch_device(opts.device.str());
return dict;
}
#undef FROM_DICT
#undef AS_DICT
} // namespace kaldifeat

View File

@ -1,54 +0,0 @@
// kaldifeat/python/csrc/utils.h
//
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
#ifndef KALDIFEAT_PYTHON_CSRC_UTILS_H_
#define KALDIFEAT_PYTHON_CSRC_UTILS_H_
#include "kaldifeat/csrc/feature-fbank.h"
#include "kaldifeat/csrc/feature-mfcc.h"
#include "kaldifeat/csrc/feature-plp.h"
#include "kaldifeat/csrc/feature-spectrogram.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/mel-computations.h"
#include "kaldifeat/csrc/whisper-fbank.h"
#include "kaldifeat/python/csrc/kaldifeat.h"
/*
* This file contains code about `from_dict` and
* `as_dict` for various options in kaldifeat.
*
* Regarding `from_dict`, users don't need to provide
* all the fields in the options. If some fields
* are not provided, it just uses the default one.
*
* If the provided dict in `from_dict` is empty,
* all fields use their default values.
*/
namespace kaldifeat {
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
py::dict AsDict(const FrameExtractionOptions &opts);
MelBanksOptions MelBanksOptionsFromDict(py::dict dict);
py::dict AsDict(const MelBanksOptions &opts);
FbankOptions FbankOptionsFromDict(py::dict dict);
py::dict AsDict(const FbankOptions &opts);
WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict);
py::dict AsDict(const WhisperFbankOptions &opts);
MfccOptions MfccOptionsFromDict(py::dict dict);
py::dict AsDict(const MfccOptions &opts);
SpectrogramOptions SpectrogramOptionsFromDict(py::dict dict);
py::dict AsDict(const SpectrogramOptions &opts);
PlpOptions PlpOptionsFromDict(py::dict dict);
py::dict AsDict(const PlpOptions &opts);
} // namespace kaldifeat
#endif // KALDIFEAT_PYTHON_CSRC_UTILS_H_

View File

@ -1,84 +0,0 @@
// kaldifeat/python/csrc/whisper-fbank.cc
//
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
#include "kaldifeat/python/csrc/whisper-fbank.h"
#include <memory>
#include <string>
#include "kaldifeat/csrc/whisper-fbank.h"
#include "kaldifeat/python/csrc/utils.h"
namespace kaldifeat {
static void PybindWhisperFbankOptions(py::module *m) {
using PyClass = WhisperFbankOptions;
py::class_<PyClass>(*m, "WhisperFbankOptions")
.def(py::init<>())
.def(py::init([](const FrameExtractionOptions &frame_opts =
FrameExtractionOptions(),
int32_t num_mels = 80,
py::object device = py::str(
"cpu")) -> std::unique_ptr<WhisperFbankOptions> {
auto opts = std::make_unique<WhisperFbankOptions>();
opts->frame_opts = frame_opts;
opts->num_mels = num_mels;
std::string s = static_cast<py::str>(device);
opts->device = torch::Device(s);
return opts;
}),
py::arg("frame_opts") = FrameExtractionOptions(),
py::arg("num_mels") = 80, py::arg("device") = py::str("cpu"))
.def_readwrite("frame_opts", &PyClass::frame_opts)
.def_readwrite("num_mels", &PyClass::num_mels)
.def_property(
"device",
[](const PyClass &self) -> py::object {
py::object ans = py::module_::import("torch").attr("device");
return ans(self.device.str());
},
[](PyClass &self, py::object obj) -> void {
std::string s = static_cast<py::str>(obj);
self.device = torch::Device(s);
})
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static("from_dict",
[](py::dict dict) -> PyClass {
return WhisperFbankOptionsFromDict(dict);
})
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass {
return WhisperFbankOptionsFromDict(dict);
}));
}
static void PybindWhisperFbankImpl(py::module *m) {
using PyClass = WhisperFbank;
py::class_<PyClass>(*m, "WhisperFbank")
.def(py::init<const WhisperFbankOptions &>(), py::arg("opts"))
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
},
[](py::dict dict) -> std::unique_ptr<PyClass> {
return std::make_unique<PyClass>(WhisperFbankOptionsFromDict(dict));
}));
}
void PybindWhisperFbank(py::module *m) {
PybindWhisperFbankOptions(m);
PybindWhisperFbankImpl(m);
}
} // namespace kaldifeat

View File

@ -1,16 +0,0 @@
// kaldifeat/python/csrc/whisper-fbank.h
//
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
#ifndef KALDIFEAT_PYTHON_CSRC_WHISPER_FBANK_H_
#define KALDIFEAT_PYTHON_CSRC_WHISPER_FBANK_H_
#include "kaldifeat/python/csrc/kaldifeat.h"
namespace kaldifeat {
void PybindWhisperFbank(py::module *m);
} // namespace kaldifeat
#endif // KALDIFEAT_PYTHON_CSRC_WHISPER_FBANK_H_

View File

@ -1,15 +1,4 @@
import torch
from .torch_version import kaldifeat_torch_version
if torch.__version__.split("+")[0] != kaldifeat_torch_version.split("+")[0]:
raise ImportError(
f"kaldifeat was built using PyTorch {kaldifeat_torch_version}\n"
f"But you are using PyTorch {torch.__version__} to run it"
)
from pathlib import Path as _Path
from _kaldifeat import (
FbankOptions,
FrameExtractionOptions,
@ -17,17 +6,9 @@ from _kaldifeat import (
MfccOptions,
PlpOptions,
SpectrogramOptions,
WhisperFbankOptions,
num_frames,
)
from .fbank import Fbank, OnlineFbank
from .mfcc import Mfcc, OnlineMfcc
from .offline_feature import OfflineFeature
from .online_feature import OnlineFeature
from .plp import OnlinePlp, Plp
from .fbank import Fbank
from .mfcc import Mfcc
from .plp import Plp
from .spectrogram import Spectrogram
from .whisper_fbank import WhisperFbank
cmake_prefix_path = _Path(__file__).parent / "share" / "cmake"
del _Path

View File

@ -4,20 +4,9 @@
import _kaldifeat
from .offline_feature import OfflineFeature
from .online_feature import OnlineFeature
class Fbank(OfflineFeature):
def __init__(self, opts: _kaldifeat.FbankOptions):
super().__init__(opts)
self.computer = _kaldifeat.Fbank(opts)
class OnlineFbank(OnlineFeature):
def __init__(self, opts: _kaldifeat.FbankOptions):
super().__init__(opts)
self.computer = _kaldifeat.OnlineFbank(opts)
def __setstate__(self, state):
self.opts = _kaldifeat.FbankOptions.from_dict(state)
self.computer = _kaldifeat.OnlineFbank(self.opts)

Some files were not shown because too many files have changed in this diff Show More