2025-08-27 10:44:17 +00:00
151 changed files with 886 additions and 15796 deletions
--- a/.flake8
+++ b/.flake8
@ -3,10 +3,8 @@ max-line-length = 80
 exclude =
  .git,
  doc,
  build,
  build_release,
  cmake/cmake_extension.py,
  kaldifeat/python/kaldifeat/__init__.py
 ignore =
--- a/.github/workflows/build-doc.yml
+++ b/.github/workflows/build-doc.yml
@ -1,81 +0,0 @@
 # Copyright      2022  Xiaomi Corp.       (author: Fangjun Kuang)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # refer to https://github.com/actions/starter-workflows/pull/47/files
 # You can access it at https://csukuangfj.github.io/kaldifeat
 name: Generate doc
 on:
  push:
    branches:
    - master
    - doc
  workflow_dispatch:
 jobs:
  build-doc:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
    steps:
      # refer to https://github.com/actions/checkout
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Display Python version
        run: python -c "import sys; print(sys.version)"
      - name: Update wheels
        shell: bash
        run: |
            export KALDIFEAT_DIR=$PWD
            ls -lh $KALDIFEAT_DIR
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
            cd huggingface
            ./run.sh
      - name: Build doc
        shell: bash
        run: |
          cd doc
          git status
          python3 -m pip install -r ./requirements.txt
          make html
          cp source/cpu.html build/html/
          cp source/cuda.html build/html/
          cp source/cpu-cn.html build/html/
          cp source/cuda-cn.html build/html/
          touch build/html/.nojekyll
      - name: Deploy
        uses: peaceiris/actions-gh-pages@v3
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          publish_dir: ./doc/build/html
          publish_branch: gh-pages
--- a/.github/workflows/macos-cpu-wheels.yml
+++ b/.github/workflows/macos-cpu-wheels.yml
@ -1,121 +0,0 @@
 name: build-wheels-cpu-macos
 on:
  push:
    branches:
      # - wheel
      - torch-2.8.0
    tags:
      - '*'
  workflow_dispatch:
 concurrency:
  group: build-wheels-cpu-macos-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          # python ./scripts/github_actions/generate_build_matrix.py --for-macos
          # MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-macos)
          python ./scripts/github_actions/generate_build_matrix.py --for-macos --test-only-latest-torch
          MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-macos --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  build_wheels_macos_cpu:
    needs: generate_build_matrix
    name: ${{ matrix.torch }} ${{ matrix.python-version }}
    runs-on: macos-14
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        shell: bash
        run: |
          pip install -q torch==${{ matrix.torch}} cmake numpy wheel>=0.40.0 twine setuptools
      - name: Build wheel
        shell: bash
        run: |
          python3 setup.py bdist_wheel
          mkdir wheelhouse
          cp -v dist/* wheelhouse
      - name: Display wheels (before fix)
        shell: bash
        run: |
          ls -lh ./wheelhouse/
      - name: Fix wheel platform tag
        run: |
          # See https://github.com/glencoesoftware/zeroc-ice-py-macos-x86_64/pull/3/files
          # See:
          #  * https://github.com/pypa/wheel/issues/406
          python -m wheel tags \
            --platform-tag=macosx_11_0_arm64 \
            --remove wheelhouse/*.whl
      - name: Display wheels (after fix)
        shell: bash
        run: |
          ls -lh ./wheelhouse/
      - name: Upload Wheel
        uses: actions/upload-artifact@v4
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-macos-latest-cpu
          path: wheelhouse/*.whl
      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: github.repository_owner == 'csukuangfj'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
            cd huggingface
            git pull
            d=cpu/1.25.5.dev20241029/macos
            mkdir -p $d
            cp -v ../wheelhouse/*.whl ./$d
            git status
            git lfs track "*.whl"
            git add .
            git commit -m "upload macos wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
--- a/.github/workflows/publish_to_pypi.yml
+++ b/.github/workflows/publish_to_pypi.yml
@ -20,37 +20,88 @@ on:
  push:
    tags:
      - '*'
  workflow_dispatch:
 jobs:
  pypi:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-18.04]
        cuda: ["10.1"]
        gcc: ["5"]
        torch: ["1.8.1"]
        python-version: [3.6, 3.7, 3.8]
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v2
        with:
-          python-version: 3.8
+          python-version: ${{ matrix.python-version }}
-      - name: Install Python dependencies
+      - name: Install CUDA Toolkit ${{ matrix.cuda }}
        shell: bash
        env:
          cuda: ${{ matrix.cuda }}
        run: |
          source ./scripts/github_actions/install_cuda.sh
          echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV
          echo "${CUDA_HOME}/bin" >> $GITHUB_PATH
          echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
      - name: Display NVCC version
        run: |
          which nvcc
          nvcc --version
      - name: Install GCC ${{ matrix.gcc }}
        run: |
          sudo apt-get install -y gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }}
          echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> $GITHUB_ENV
          echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV
          echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV
      - name: Install PyTorch ${{ matrix.torch }}
        env:
          cuda: ${{ matrix.cuda }}
          torch: ${{ matrix.torch }}
        shell: bash
        run: |
          python3 -m pip install --upgrade pip
-          python3 -m pip install wheel twine setuptools
+          python3 -m pip install wheel twine typing_extensions
-          python3 -m pip install torch==1.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+          python3 -m pip install bs4 requests tqdm
-      - name: Build
+          ./scripts/github_actions/install_torch.sh
-        shell: bash
+          python3 -c "import torch; print('torch version:', torch.__version__)"
      - name: Download cudnn 8.0
        env:
          cuda: ${{ matrix.cuda }}
        run: |
-          python3 setup.py sdist
+          ./scripts/github_actions/install_cudnn.sh
-          ls -l dist/*
+
      - name: Build pip packages
        shell: bash
        env:
          KALDIFEAT_IS_FOR_PYPI: 1
        run: |
          tag=$(python3 -c "import sys; print(''.join(sys.version[:3].split('.')))")
          export KALDIFEAT_MAKE_ARGS="-j2"
          python3 setup.py bdist_wheel --python-tag=py${tag}
          ls -lh dist/
      - name: Publish wheels to PyPI
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        run: |
-          twine upload dist/kaldifeat-*.tar.gz
+          twine upload dist/kaldifeat-*.whl
      - name: Upload Wheel
        uses: actions/upload-artifact@v2
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-${{ matrix.os }}
          path: dist/*.whl
--- a/.github/workflows/run-tests-macos-cpu.yml
+++ b/.github/workflows/run-tests-macos-cpu.yml
@ -1,85 +0,0 @@
 # Copyright      2021  Xiaomi Corp.       (author: Fangjun Kuang)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 name: Run tests macos cpu
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
          MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  run_tests_macos_cpu:
    needs: generate_build_matrix
    runs-on: macos-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install PyTorch ${{ matrix.torch }}
        shell: bash
        run: |
          python3 -m pip install -qq --upgrade pip
          python3 -m pip install -qq wheel twine typing_extensions soundfile numpy
          python3 -m pip install -qq torch==${{ matrix.torch }} -f https://download.pytorch.org/whl/torch_stable.html || python3 -m pip install -qq torch==${{ matrix.torch }} -f https://download.pytorch.org/whl/torch/
          python3 -c "import torch; print('torch version:', torch.__version__)"
      - name: Build
        shell: bash
        run: |
          mkdir build_release
          cd build_release
          cmake -DCMAKE_CXX_STANDARD=17 ..
          make VERBOSE=1 -j3
      - name: Run tests
        shell: bash
        run: |
          cd build_release
          ctest --output-on-failure
--- a/.github/workflows/run-tests-ubuntu-cpu.yml
+++ b/.github/workflows/run-tests-ubuntu-cpu.yml
@ -1,88 +0,0 @@
 # Copyright      2021  Xiaomi Corp.       (author: Fangjun Kuang)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 name: Run tests ubuntu cpu
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
          MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  run_tests_ubuntu_cpu:
    needs: generate_build_matrix
    runs-on: ubuntu-18.04
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install PyTorch ${{ matrix.torch }}
        shell: bash
        run: |
          sudo apt-get update
          sudo apt-get install -y libsndfile1-dev libsndfile1 ffmpeg
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine typing_extensions soundfile
          python3 -m pip install bs4 requests tqdm numpy
          python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch/
          python3 -c "import torch; print('torch version:', torch.__version__)"
      - name: Build
        shell: bash
        run: |
          mkdir build_release
          cd build_release
          cmake -DCMAKE_CXX_STANDARD=17 ..
          make VERBOSE=1 -j3
      - name: Run tests
        shell: bash
        run: |
          cd build_release
          ctest --output-on-failure
--- a/.github/workflows/run-tests-ubuntu-cuda.yml
+++ b/.github/workflows/run-tests-ubuntu-cuda.yml
@ -1,112 +0,0 @@
 # Copyright      2021  Xiaomi Corp.       (author: Fangjun Kuang)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 name: Run tests ubuntu cuda
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch
          MATRIX=$(python scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  run_tests_ubuntu_cuda:
    needs: generate_build_matrix
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install CUDA Toolkit ${{ matrix.cuda }}
        shell: bash
        env:
          cuda: ${{ matrix.cuda }}
        run: |
          source ./scripts/github_actions/install_cuda.sh
          echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV
          echo "${CUDA_HOME}/bin" >> $GITHUB_PATH
          echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
      - name: Display NVCC version
        run: |
          which nvcc
          nvcc --version
      - name: Install PyTorch ${{ matrix.torch }}
        env:
          cuda: ${{ matrix.cuda }}
          torch: ${{ matrix.torch }}
        shell: bash
        run: |
          sudo apt-get update
          sudo apt-get install -y libsndfile1-dev libsndfile1 ffmpeg
          python3 -m pip install --upgrade pip
          python3 -m pip install wheel twine typing_extensions soundfile
          python3 -m pip install bs4 requests tqdm numpy
          ./scripts/github_actions/install_torch.sh
          python3 -c "import torch; print('torch version:', torch.__version__)"
      - name: Download cudnn 8.0
        env:
          cuda: ${{ matrix.cuda }}
        run: |
          ./scripts/github_actions/install_cudnn.sh
      - name: Build
        shell: bash
        run: |
          mkdir build_release
          cd build_release
          cmake -DCMAKE_CXX_STANDARD=17 ..
          make VERBOSE=1 -j3
      - name: Run tests
        shell: bash
        run: |
          cd build_release
          ctest --output-on-failure
--- a/.github/workflows/run-tests-windows-cpu.yml
+++ b/.github/workflows/run-tests-windows-cpu.yml
@ -1,121 +0,0 @@
 # Copyright      2021  Xiaomi Corp.       (author: Fangjun Kuang)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 name: Run tests windows cpu
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
          MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  run_tests_windows_cpu:
    # see https://github.com/actions/virtual-environments/blob/win19/20210525.0/images/win/Windows2019-Readme.md
    needs: generate_build_matrix
    runs-on: windows-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      # see https://github.com/microsoft/setup-msbuild
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v1.0.2
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Display Python version
        run: python -c "import sys; print(sys.version)"
      - name: Install PyTorch ${{ matrix.torch }}
        run: |
          pip3 install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || pip3 install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch/
          pip3 install -qq wheel twine dataclasses numpy typing_extensions soundfile
      - name: Display CMake version
        run: |
          cmake --version
          cmake --help
      - name: Configure CMake
        shell: bash
        run: |
          mkdir build_release
          cd build_release
          cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
          ls -lh
      - name: Build kaldifeat
        run: |
          cd build_release
          cmake --build -DCMAKE_CXX_STANDARD=17 . --target _kaldifeat --config Release
      - name: Display generated files
        shell: bash
        run: |
          cd build_release
          ls -lh lib/*/*
      - name: Build wheel
        shell: bash
        run: |
          python3 setup.py bdist_wheel
          ls -lh dist/
          pip install ./dist/*.whl
          python3 -c "import kaldifeat; print(kaldifeat.__version__)"
      - name: Upload Wheel
        uses: actions/upload-artifact@v4
        with:
          name: python-${{ matrix.python-version }}-${{ matrix.os }}-cpu
          path: dist/*.whl
      - name: Build tests
        shell: bash
        run: |
          cd build_release
          cmake -DCMAKE_CXX_STANDARD=17 --build . --target ALL_BUILD --config Release
          ls -lh bin/*/*
          ctest -C Release --verbose --output-on-failure
--- a/.github/workflows/run-tests-windows-cuda.yml
+++ b/.github/workflows/run-tests-windows-cuda.yml
@ -1,173 +0,0 @@
 # Copyright      2021  Xiaomi Corp.       (author: Fangjun Kuang)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: Run tests windows cuda
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python scripts/github_actions/generate_build_matrix.py --enable-cuda --for-windows --test-only-latest-torch
          MATRIX=$(python scripts/github_actions/generate_build_matrix.py --enable-cuda --for-windows --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  run_tests_windows_cuda:
    needs: generate_build_matrix
    runs-on: windows-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      # see https://github.com/microsoft/setup-msbuild
      - name: Add msbuild to PATH
        uses: microsoft/setup-msbuild@v1.0.2
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Display Python version
        run: python -c "import sys; print(sys.version)"
      # See https://github.com/Jimver/cuda-toolkit/blob/master/src/links/windows-links.ts
      # for available CUDA versions
      - uses: Jimver/cuda-toolkit@v0.2.7
        id: cuda-toolkit
        with:
          cuda: ${{ matrix.cuda }}
      - name: Display CUDA version
        shell: bash
        run: |
          echo "Installed cuda version is: ${{ steps.cuda-toolkit.outputs.cuda }}"
          echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
          nvcc --version
      - name: Remove CUDA installation package
        shell: bash
        run: |
          rm "C:/hostedtoolcache/windows/cuda_installer-windows/${{ matrix.cuda }}/x64/cuda_installer_${{ matrix.cuda }}.exe"
      - name: Download cuDNN
        shell: bash
        run: |
          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/cudnn-for-windows
          cd cudnn-for-windows
          git lfs pull --include="cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip"
          unzip cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip
          rm cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive.zip
          ls -lh *
          ls -lh */*
          echo "PWD: $PWD"
      - name: Install PyTorch ${{ matrix.torch }}
        shell: bash
        run: |
          version=${{ matrix.cuda }}
          major=${version:0:2}
          minor=${version:3:1}
          v=${major}${minor}
          if [ ${v} -eq 102 ]; then v=""; else v="+cu${v}"; fi
          python3 -m pip install -qq --upgrade pip
          python3 -m pip install -qq wheel twine numpy typing_extensions
          python3 -m pip install -qq dataclasses soundfile numpy
          python3 -m pip install -qq torch==${{ matrix.torch }}${v} -f https://download.pytorch.org/whl/torch_stable.html numpy || python3 -m pip install -qq torch==${{ matrix.torch }}${v} -f https://download.pytorch.org/whl/torch/ numpy
          python3 -c "import torch; print('torch version:', torch.__version__)"
          python3 -m torch.utils.collect_env
      - name: Display CMake version
        run: |
          cmake --version
          cmake --help
      - name: Configure CMake
        shell: bash
        run: |
          echo "PWD: $PWD"
          ls -lh
          mkdir build_release
          cd build_release
          cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCUDNN_INCLUDE_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/include -DCUDNN_LIBRARY_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/lib/cudnn.lib ..
          ls -lh
      - name: Build kaldifeat
        shell: bash
        run: |
          cd build_release
          cmake --build . --target _kaldifeat --config Release
      - name: Display generated files
        shell: bash
        run: |
          cd build_release
          ls -lh lib/*/*
      - name: Build wheel
        shell: bash
        run: |
          echo $PWD
          ls -lh ./*
          export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCUDNN_INCLUDE_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/include -DCUDNN_LIBRARY_PATH=d:/a/kaldifeat/kaldifeat/cudnn-for-windows/cudnn-windows-x86_64-8.4.1.50_cuda11.6-archive/lib/cudnn.lib"
          python3 setup.py bdist_wheel
          ls -lh dist/
          pip install ./dist/*.whl
          python3 -c "import kaldifeat; print(kaldifeat.__version__)"
      - name: Upload Wheel
        uses: actions/upload-artifact@v4
        with:
          name: python-${{ matrix.python-version }}-${{ matrix.os }}-cuda-${{ matrix.cuda }}
          path: dist/*.whl
      - name: Build tests
        shell: bash
        run: |
          cd build_release
          cmake -DCMAKE_CXX_STANDARD=17 --build . --target ALL_BUILD --config Release
          ls -lh bin/*/*
          ctest -C Release --verbose --output-on-failure
--- a/.github/workflows/style_check.yml
+++ b/.github/workflows/style_check.yml
@ -1,64 +0,0 @@
 # Copyright      2021  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: style_check
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 jobs:
  style_check:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, macos-latest]
        python-version: ["3.8"]
      fail-fast: false
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v1
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install Python dependencies
        run: |
          python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2 click==8.0.4
          # See https://github.com/psf/black/issues/2964
          # The version of click should be selected from 8.0.0, 8.0.1, 8.0.2, 8.0.3, and 8.0.4
      - name: Run flake8
        shell: bash
        working-directory: ${{github.workspace}}
        run: |
          # stop the build if there are Python syntax errors or undefined names
          flake8 . --count --show-source --statistics
          flake8 .
      - name: Run black
        shell: bash
        working-directory: ${{github.workspace}}
        run: |
          black --check --diff .
--- a/.github/workflows/test-wheels.yml
+++ b/.github/workflows/test-wheels.yml
@ -1,67 +0,0 @@
 name: Test pre-compiled wheels
 on:
  workflow_dispatch:
    inputs:
      torch_version:
        description: "torch version, e.g., 2.0.1"
        required: true
      kaldifeat_version:
        description: "kaldifeat version, e.g., 1.25.0.dev20230726"
        required: true
 jobs:
  Test_pre_compiled_wheels:
    name: ${{ matrix.os }} ${{ github.event.inputs.torch_version }} ${{ github.event.inputs.kaldifeat_version }} ${{ matrix.python-version }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python-version: ["3.8", "3.9", "3.10"]
    steps:
      # refer to https://github.com/actions/checkout
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Display Python version
        run: python -c "import sys; print(sys.version)"
      - name: Install dependencies
        shell: bash
        run: |
          pip install numpy
      - name: Install torch
        if: startsWith(matrix.os, 'macos')
        shell: bash
        run: |
          pip install torch==${{ github.event.inputs.torch_version }}
      - name: Install torch
        if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'windows')
        shell: bash
        run: |
          pip install torch==${{ github.event.inputs.torch_version }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || pip install torch==${{ github.event.inputs.torch_version }}+cpu -f https://download.pytorch.org/whl/torch/
      - name: Install kaldifeat
        shell: bash
        run: |
          pip install kaldifeat==${{ github.event.inputs.kaldifeat_version }}+cpu.torch${{ github.event.inputs.torch_version }} -f https://csukuangfj.github.io/kaldifeat/cpu.html
      - name: Run tests
        shell: bash
        run: |
          cd kaldifeat/python/tests
          python3 -c "import kaldifeat; print(kaldifeat.__file__)"
          python3 -c "import kaldifeat; print(kaldifeat.__version__)"
          python3 ./test_fbank_options.py
          python3 ./test_mfcc_options.py
--- a/.github/workflows/ubuntu-arm64-cpu-wheels.yml
+++ b/.github/workflows/ubuntu-arm64-cpu-wheels.yml
@ -1,168 +0,0 @@
 name: build-wheels-cpu-arm64-ubuntu
 on:
  push:
    branches:
      # - wheel
      - torch-2.8.0
    tags:
      - '*'
  workflow_dispatch:
 concurrency:
  group: build-wheels-cpu-arm64-ubuntu-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          # python ./scripts/github_actions/generate_build_matrix.py --for-arm64
          # MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-arm64)
          python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch --for-arm64
          MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch --for-arm64)
          echo "::set-output name=matrix::${MATRIX}"
  build-manylinux-wheels:
    needs: generate_build_matrix
    name: ${{ matrix.torch }} ${{ matrix.python-version }}
    runs-on: ubuntu-22.04-arm
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      # see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
      # https://github.com/pytorch/builder/tree/main/manywheel
      # https://github.com/pytorch/builder/pull/476
      # https://github.com/k2-fsa/k2/issues/733
      # https://github.com/pytorch/pytorch/pull/50633  (generate build matrix)
      - name: Run the build process with Docker
        uses: addnab/docker-run-action@v3
        with:
            image: ${{ matrix.image }}
            options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }}
            run: |
              echo "pwd: $PWD"
              uname -a
              id
              cat /etc/*release
              gcc --version
              python3 --version
              which python3
              ls -lh /opt/python/
              echo "---"
              ls -lh /opt/python/cp*
              ls -lh /opt/python/*/bin
              echo "---"
              find /opt/python/cp* -name "libpython*"
              echo "-----"
              find /opt/_internal/cp* -name "libpython*"
              echo "-----"
              find / -name "libpython*"
              echo "----"
              ls -lh /usr/lib64/libpython3.so
              # cp36-cp36m
              # cp37-cp37m
              # cp38-cp38
              # cp39-cp39
              # cp310-cp310
              # cp311-cp311
              # cp312-cp312
              # cp313-cp313
              # cp313-cp313t  (no gil)
              if [[ $PYTHON_VERSION == "3.6" ]]; then
                python_dir=/opt/python/cp36-cp36m
                export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.7" ]]; then
                python_dir=/opt/python/cp37-cp37m
                export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.8" ]]; then
                python_dir=/opt/python/cp38-cp38
                export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.9" ]]; then
                python_dir=/opt/python/cp39-cp39
                export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.10" ]]; then
                python_dir=/opt/python/cp310-cp310
                export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.11" ]]; then
                python_dir=/opt/python/cp311-cp311
                export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.12" ]]; then
                python_dir=/opt/python/cp312-cp312
                export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.13" ]]; then
                python_dir=/opt/python/cp313-cp313
                export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
              else
                echo "Unsupported Python version $PYTHON_VERSION"
                exit 1
              fi
              export PYTHON_INSTALL_DIR=$python_dir
              export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
              python3 --version
              which python3
              /var/www/scripts/github_actions/build-ubuntu-cpu-arm64.sh
      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: github.repository_owner == 'csukuangfj'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
            cd huggingface
            git pull
            d=cpu/1.25.5.dev20250307/linux-arm64
            mkdir -p $d
            cp -v ../wheelhouse/*.whl ./$d
            git status
            git lfs track "*.whl"
            git add .
            git commit -m "upload ubuntu-arm64-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
--- a/.github/workflows/ubuntu-cpu-wheels.yml
+++ b/.github/workflows/ubuntu-cpu-wheels.yml
@ -1,168 +0,0 @@
 name: build-wheels-cpu-ubuntu
 on:
  push:
    branches:
      # - wheel
      - torch-2.8.0
    tags:
      - '*'
  workflow_dispatch:
 concurrency:
  group: build-wheels-cpu-ubuntu-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          # python ./scripts/github_actions/generate_build_matrix.py
          # MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py)
          python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch
          MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  build-manylinux-wheels:
    needs: generate_build_matrix
    name: ${{ matrix.torch }} ${{ matrix.python-version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      # see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
      # https://github.com/pytorch/builder/tree/main/manywheel
      # https://github.com/pytorch/builder/pull/476
      # https://github.com/k2-fsa/k2/issues/733
      # https://github.com/pytorch/pytorch/pull/50633  (generate build matrix)
      - name: Run the build process with Docker
        uses: addnab/docker-run-action@v3
        with:
            image: ${{ matrix.image }}
            options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }}
            run: |
              echo "pwd: $PWD"
              uname -a
              id
              cat /etc/*release
              gcc --version
              python3 --version
              which python3
              ls -lh /opt/python/
              echo "---"
              ls -lh /opt/python/cp*
              ls -lh /opt/python/*/bin
              echo "---"
              find /opt/python/cp* -name "libpython*"
              echo "-----"
              find /opt/_internal/cp* -name "libpython*"
              echo "-----"
              find / -name "libpython*"
              echo "----"
              ls -lh /usr/lib64/libpython3.so || true
              # cp36-cp36m
              # cp37-cp37m
              # cp38-cp38
              # cp39-cp39
              # cp310-cp310
              # cp311-cp311
              # cp312-cp312
              # cp313-cp313
              # cp313-cp313t  (no gil)
              if [[ $PYTHON_VERSION == "3.6" ]]; then
                python_dir=/opt/python/cp36-cp36m
                export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.7" ]]; then
                python_dir=/opt/python/cp37-cp37m
                export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.8" ]]; then
                python_dir=/opt/python/cp38-cp38
                export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.9" ]]; then
                python_dir=/opt/python/cp39-cp39
                export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.10" ]]; then
                python_dir=/opt/python/cp310-cp310
                export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.11" ]]; then
                python_dir=/opt/python/cp311-cp311
                export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.12" ]]; then
                python_dir=/opt/python/cp312-cp312
                export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.13" ]]; then
                python_dir=/opt/python/cp313-cp313
                export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
              else
                echo "Unsupported Python version $PYTHON_VERSION"
                exit 1
              fi
              export PYTHON_INSTALL_DIR=$python_dir
              export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
              python3 --version
              which python3
              /var/www/scripts/github_actions/build-ubuntu-cpu.sh
      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: github.repository_owner == 'csukuangfj'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
            cd huggingface
            git pull
            d=cpu/1.25.5.dev20250307/linux-x64
            mkdir -p $d
            cp -v ../wheelhouse/*.whl ./$d
            git status
            git lfs track "*.whl"
            git add .
            git commit -m "upload ubuntu-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
--- a/.github/workflows/ubuntu-cuda-wheels.yml
+++ b/.github/workflows/ubuntu-cuda-wheels.yml
@ -1,194 +0,0 @@
 name: build-wheels-cuda-ubuntu
 on:
  push:
    branches:
      - wheel
      # - torch-2.7.1
    tags:
      - '*'
  workflow_dispatch:
 concurrency:
  group: build-wheels-cuda-ubuntu-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          # python ./scripts/github_actions/generate_build_matrix.py --enable-cuda
          # MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --enable-cuda)
          python ./scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch
          MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  build-manylinux-wheels:
    needs: generate_build_matrix
    name: ${{ matrix.torch }} ${{ matrix.python-version }} cuda${{ matrix.cuda }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
          echo "pwd: $PWD"
          echo "github.workspace ${{ github.workspace }}"
      # see https://github.com/pytorch/test-infra/blob/9e3d392690719fac85bad0c9b67f530e48375ca1/tools/scripts/generate_binary_build_matrix.py
      # https://github.com/pytorch/builder/tree/main/manywheel
      # https://github.com/pytorch/builder/pull/476
      # https://github.com/k2-fsa/k2/issues/733
      # https://github.com/pytorch/pytorch/pull/50633  (generate build matrix)
      - name: Run the build process with Docker
        uses: addnab/docker-run-action@v3
        with:
            image: ${{ matrix.image }}
            options: -v ${{ github.workspace }}:/var/www -e IS_2_28=${{ matrix.is_2_28 }} -e PYTHON_VERSION=${{ matrix.python-version }} -e TORCH_VERSION=${{ matrix.torch }} -e CUDA_VERSION=${{ matrix.cuda }}
            run: |
              echo "pwd: $PWD"
              uname -a
              id
              cat /etc/*release
              gcc --version
              python3 --version
              which python3
              ls -lh /opt/python/
              echo "---"
              ls -lh /opt/python/cp*
              ls -lh /opt/python/*/bin
              echo "---"
              find /opt/python/cp* -name "libpython*"
              echo "-----"
              find /opt/_internal/cp* -name "libpython*"
              echo "-----"
              find / -name "libpython*"
              # cp36-cp36m
              # cp37-cp37m
              # cp38-cp38
              # cp39-cp39
              # cp310-cp310
              # cp311-cp311
              # cp312-cp312
              # cp313-cp313
              # cp313-cp313t  (no gil)
              if [[ $PYTHON_VERSION == "3.6" ]]; then
                python_dir=/opt/python/cp36-cp36m
                export PYTHONPATH=/opt/python/cp36-cp36m/lib/python3.6/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.7" ]]; then
                python_dir=/opt/python/cp37-cp37m
                export PYTHONPATH=/opt/python/cp37-cp37m/lib/python3.7/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.8" ]]; then
                python_dir=/opt/python/cp38-cp38
                export PYTHONPATH=/opt/python/cp38-cp38/lib/python3.8/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.9" ]]; then
                python_dir=/opt/python/cp39-cp39
                export PYTHONPATH=/opt/python/cp39-cp39/lib/python3.9/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.10" ]]; then
                python_dir=/opt/python/cp310-cp310
                export PYTHONPATH=/opt/python/cp310-cp310/lib/python3.10/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.11" ]]; then
                python_dir=/opt/python/cp311-cp311
                export PYTHONPATH=/opt/python/cp311-cp311/lib/python3.11/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.12" ]]; then
                python_dir=/opt/python/cp312-cp312
                export PYTHONPATH=/opt/python/cp312-cp312/lib/python3.12/site-packages:$PYTHONPATH
              elif [[ $PYTHON_VERSION == "3.13" ]]; then
                python_dir=/opt/python/cp313-cp313
                export PYTHONPATH=/opt/python/cp313-cp313/lib/python3.13/site-packages:$PYTHONPATH
              else
                echo "Unsupported Python version $PYTHON_VERSION"
                exit 1
              fi
              export PYTHON_INSTALL_DIR=$python_dir
              export PATH=$PYTHON_INSTALL_DIR/bin:$PATH
              # There are no libpython.so inside $PYTHON_INSTALL_DIR
              # since they are statically linked.
              python3 --version
              which python3
              pushd /usr/local
              rm cuda
              ln -s cuda-$CUDA_VERSION cuda
              popd
              which nvcc
              nvcc --version
              cp /var/www/scripts/github_actions/install_torch.sh .
              chmod +x install_torch.sh
              /var/www/scripts/github_actions/build-ubuntu-cuda.sh
      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
      - name: Upload Wheel
        if: false
        uses: actions/upload-artifact@v4
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cuda-is_2_28-${{ matrix.is_2_28 }}
          path: wheelhouse/*.whl
      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: github.repository_owner == 'csukuangfj'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
            cd huggingface
            git pull
            d=cuda/1.25.5.dev20241029/linux
            mkdir -p $d
            cp -v ../wheelhouse/*.whl ./$d
            git status
            git lfs track "*.whl"
            git add .
            git commit -m "upload ubuntu-cuda wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
--- a/.github/workflows/windows-x64-cpu-wheels.yml
+++ b/.github/workflows/windows-x64-cpu-wheels.yml
@ -1,108 +0,0 @@
 name: build-wheels-cpu-win64
 on:
  push:
    branches:
      # - wheel
      - torch-2.8.0
    tags:
      - '*'
  workflow_dispatch:
 concurrency:
  group: build-wheels-cpu-win64-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          # python ./scripts/github_actions/generate_build_matrix.py --for-windows
          # MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-windows)
          python ./scripts/github_actions/generate_build_matrix.py --for-windows --test-only-latest-torch
          MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-windows --test-only-latest-torch)
          echo "::set-output name=matrix::${MATRIX}"
  build_wheels_win64_cpu:
    needs: generate_build_matrix
    name: ${{ matrix.torch }} ${{ matrix.python-version }}
    runs-on: windows-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        shell: bash
        run: |
          pip install -q torch==${{ matrix.torch}} cmake numpy wheel>=0.40.0 twine setuptools
          pip install torch==${{ matrix.torch}}+cpu -f https://download.pytorch.org/whl/torch_stable.html cmake numpy || pip install torch==${{ matrix.torch}}+cpu -f https://download.pytorch.org/whl/torch/ cmake numpy
      - name: Build wheel
        shell: bash
        run: |
          python3 setup.py bdist_wheel
          mkdir wheelhouse
          cp -v dist/* wheelhouse
      - name: Display wheels
        shell: bash
        run: |
          ls -lh ./wheelhouse/
      - name: Upload Wheel
        uses: actions/upload-artifact@v4
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-windows-latest-cpu
          path: wheelhouse/*.whl
      # https://huggingface.co/docs/hub/spaces-github-actions
      - name: Publish to huggingface
        if: github.repository_owner == 'csukuangfj'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v2
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            git clone https://huggingface.co/csukuangfj/kaldifeat huggingface
            cd huggingface
            git pull
            d=cpu/1.25.5.dev20241029/windows
            mkdir -p $d
            cp -v ../wheelhouse/*.whl ./$d
            git status
            git lfs track "*.whl"
            git add .
            git commit -m "upload windows-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}"
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main
--- a/.gitignore
+++ b/.gitignore
@ -5,6 +5,3 @@ dist/
 __pycache__/
 test-1hour.wav
 path.sh
 torch_version.py
 cpu*.html
 cuda*.html
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,16 +1,10 @@
 # Copyright (c)  2021  Xiaomi Corporation (author: Fangjun Kuang)
 if (CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
  set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
 endif()
 cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
 project(kaldifeat)
-# remember to change the version in
+set(kaldifeat_VERSION "1.2")
 # scripts/conda/kaldifeat/meta.yaml
 # scripts/conda-cpu/kaldifeat/meta.yaml
 set(kaldifeat_VERSION "1.25.5")
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
@ -19,102 +13,32 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
 set(CMAKE_SKIP_BUILD_RPATH FALSE)
 set(BUILD_RPATH_USE_ORIGIN TRUE)
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
-
+set(CMAKE_INSTALL_RPATH "$ORIGIN")
-
+set(CMAKE_BUILD_RPATH "$ORIGIN")
 if(NOT APPLE)
  set(kaldifeat_rpath_origin "$ORIGIN")
 else()
  set(kaldifeat_rpath_origin "@loader_path")
 endif()
 set(CMAKE_INSTALL_RPATH ${kaldifeat_rpath_origin})
 set(CMAKE_BUILD_RPATH ${kaldifeat_rpath_origin})
 if(NOT CMAKE_BUILD_TYPE)
  message(STATUS "No CMAKE_BUILD_TYPE given, default to Release")
  set(CMAKE_BUILD_TYPE Release)
 endif()
-if (NOT CMAKE_CXX_STANDARD)
+set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
  set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.")
 endif()
 message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
 set(CMAKE_CXX_EXTENSIONS OFF)
-list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
+message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
-option(BUILD_SHARED_LIBS "Whether to build shared libraries" ON)
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
-option(kaldifeat_BUILD_TESTS "Whether to build tests or not" OFF)
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
 option(kaldifeat_BUILD_PYMODULE "Whether to build python module or not" ON)
 message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}")
-if(BUILD_SHARED_LIBS AND MSVC)
+option(BUILD_TESTS "Whether to build tests or not" ON)
  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 endif()
-if(kaldifeat_BUILD_PYMODULE)
+include(pybind11)
  include(pybind11)
 endif()
 # to prevent cmake from trying to link with system installed mkl since we not directly use it
 # mkl libraries should be linked with pytorch already
 # ref: https://github.com/pytorch/pytorch/blob/master/cmake/public/mkl.cmake
 set(CMAKE_DISABLE_FIND_PACKAGE_MKL TRUE)
 include(torch)
-if(kaldifeat_BUILD_TESTS)
+if(BUILD_TESTS)
  include(googletest)
  enable_testing()
 endif()
-
+include_directories(${CMAKE_SOURCE_DIR})
 if(WIN32)
  # disable various warnings for MSVC
  # 4624: destructor was implicitly defined as deleted because a base class destructor is inaccessible or deleted
  set(disabled_warnings
      /wd4624
  )
  message(STATUS "Disabled warnings: ${disabled_warnings}")
  foreach(w IN LISTS disabled_warnings)
    string(APPEND CMAKE_CXX_FLAGS " ${w} ")
  endforeach()
 endif()
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
 message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
 add_subdirectory(kaldifeat)
 # TORCH_VERSION is defined in cmake/torch.cmake
 configure_file(
  ${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py.in
  ${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py @ONLY
 )
 configure_file(
  ${PROJECT_SOURCE_DIR}/cmake/kaldifeatConfigVersion.cmake.in
  ${PROJECT_BINARY_DIR}/kaldifeatConfigVersion.cmake
  @ONLY
 )
 configure_file(
  ${PROJECT_SOURCE_DIR}/cmake/kaldifeatConfig.cmake.in
  ${PROJECT_BINARY_DIR}/kaldifeatConfig.cmake
  @ONLY
 )
 install(FILES
  ${PROJECT_BINARY_DIR}/kaldifeatConfigVersion.cmake
  ${PROJECT_BINARY_DIR}/kaldifeatConfig.cmake
  DESTINATION share/cmake/kaldifeat
 )
 install(FILES
  ${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py
  DESTINATION ./
 )
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,6 +0,0 @@
 include LICENSE
 include README.md
 include CMakeLists.txt
 exclude pyproject.toml
 recursive-include kaldifeat *.*
 recursive-include cmake *.*
--- a/README.md
+++ b/README.md
@ -1,178 +1,8 @@
 # kaldifeat
 <div align="center">
 <img src="/doc/source/images/os-green.svg">
 <img src="/doc/source/images/python_ge_3.6-blue.svg">
 <img src="/doc/source/images/pytorch_ge_1.5.0-green.svg">
 <img src="/doc/source/images/cuda_ge_10.1-orange.svg">
 </div>
 [![Documentation Status](https://github.com/csukuangfj/kaldifeat/actions/workflows/build-doc.yml/badge.svg)](https://csukuangfj.github.io/kaldifeat/)
 **Documentation**: <https://csukuangfj.github.io/kaldifeat>
 **Note**: If you are looking for a version that does not depend on PyTorch,
 please see <https://github.com/csukuangfj/kaldi-native-fbank>
 # Installation
 Refer to
 <https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html>
 for installation.
 > Never use `pip install kaldifeat`
 > Never use `pip install kaldifeat`
 > Never use `pip install kaldifeat`
 <sub>
 <table>
 <tr>
 <th>Comments</th>
 <th>Options</th>
 <th>Feature Computer</th>
 <th>Usage</th>
 </tr>
 <tr>
 <td>Fbank for <a href="https://github.com/openai/whisper">Whisper</a></td>
 <td><code>kaldifeat.WhisperFbankOptions</code></td>
 <td><code>kaldifeat.WhisperFbank</code></td>
 <td>
 <pre lang="python">
 opts = kaldifeat.WhisperFbankOptions()
 opts.device = torch.device('cuda', 0)
 fbank = kaldifeat.WhisperFbank(opts)
 features = fbank(wave)
 </pre>
 See <a href="https://github.com/csukuangfj/kaldifeat/pull/82">#82</a>
 </td>
 </tr>
 <tr>
 <td>Fbank for <a href="https://github.com/openai/whisper">Whisper-V3</a></td>
 <td><code>kaldifeat.WhisperFbankOptions</code></td>
 <td><code>kaldifeat.WhisperFbank</code></td>
 <td>
 <pre lang="python">
 opts = kaldifeat.WhisperFbankOptions()
 opts.num_mels = 128
 opts.device = torch.device('cuda', 0)
 fbank = kaldifeat.WhisperFbank(opts)
 features = fbank(wave)
 </pre>
 </td>
 </tr>
 <tr>
 <td>FBANK</td>
 <td><code>kaldifeat.FbankOptions</code></td>
 <td><code>kaldifeat.Fbank</code></td>
 <td>
 <pre lang="python">
 opts = kaldifeat.FbankOptions()
 opts.device = torch.device('cuda', 0)
 opts.frame_opts.window_type = 'povey'
 fbank = kaldifeat.Fbank(opts)
 features = fbank(wave)
 </pre>
 </td>
 </tr>
 <tr>
 <td>Streaming FBANK</td>
 <td><code>kaldifeat.FbankOptions</code></td>
 <td><code>kaldifeat.OnlineFbank</code></td>
 <td>
 See <a href="./kaldifeat/python/tests/test_fbank.py">
 ./kaldifeat/python/tests/test_fbank.py
 </a>
 </td>
 </tr>
 <tr>
 <td>MFCC</td>
 <td><code>kaldifeat.MfccOptions</code></td>
 <td><code>kaldifeat.Mfcc</code></td>
 <td>
 <pre lang="python">
 opts = kaldifeat.MfccOptions();
 opts.num_ceps = 13
 mfcc = kaldifeat.Mfcc(opts)
 features = mfcc(wave)
 </pre>
 </td>
 </tr>
 <tr>
 <td>Streaming MFCC</td>
 <td><code>kaldifeat.MfccOptions</code></td>
 <td><code>kaldifeat.OnlineMfcc</code></td>
 <td>
 See <a href="./kaldifeat/python/tests/test_mfcc.py">
 ./kaldifeat/python/tests/test_mfcc.py
 </a>
 </td>
 </tr>
 <tr>
 <td>PLP</td>
 <td><code>kaldifeat.PlpOptions</code></td>
 <td><code>kaldifeat.Plp</code></td>
 <td>
 <pre lang="python">
 opts = kaldifeat.PlpOptions();
 opts.mel_opts.num_bins = 23
 plp = kaldifeat.Plp(opts)
 features = plp(wave)
 </pre>
 </td>
 </tr>
 <tr>
 <td>Streaming PLP</td>
 <td><code>kaldifeat.PlpOptions</code></td>
 <td><code>kaldifeat.OnlinePlp</code></td>
 <td>
 See <a href="./kaldifeat/python/tests/test_plp.py">
 ./kaldifeat/python/tests/test_plp.py
 </a>
 </td>
 </tr>
 <tr>
 <td>Spectorgram</td>
 <td><code>kaldifeat.SpectrogramOptions</code></td>
 <td><code>kaldifeat.Spectrogram</code></td>
 <td>
 <pre lang="python">
 opts = kaldifeat.SpectrogramOptions();
 print(opts)
 spectrogram = kaldifeat.Spectrogram(opts)
 features = spectrogram(wave)
 </pre>
 </td>
 </tr>
 </table>
 </sub>
 Feature extraction compatible with `Kaldi` using PyTorch, supporting
 CUDA, batch processing, chunk processing, and autograd.
 The following kaldi-compatible commandline tools are implemented:
  - `compute-fbank-feats`
  - `compute-mfcc-feats`
  - `compute-plp-feats`
  - `compute-spectrogram-feats`
 (**NOTE**: We will implement other types of features, e.g., Pitch, ivector, etc, soon.)
 **HINT**: It supports also streaming feature extractors for Fbank, MFCC, and Plp.
 # Usage
@ -184,7 +14,7 @@ Let us first generate a test wave using sox:
 sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300
 ```
-**HINT**: Download [test.wav][test_wav].
+**HINT**: Download [test_wav][test_wav].
 [test_wav]: kaldifeat/python/tests/test_data/test.wav
@ -209,8 +39,8 @@ features = fbank(wave)
 ```
 To compute features that are compatible with `Kaldi`, wave samples have to be
-scaled to the range `[-32768, 32768]`. **WARNING**: You don't have to do this if
+scaled to the range `[-32768, 32768]`. WARNING: You don't have to do this if
-you don't care about the compatibility with `Kaldi`.
+you don't care about the compatibility with `Kaldi`
 The following is an example:
@ -255,7 +85,7 @@ The output is:
 You can see that ``kaldifeat`` produces the same output as `Kaldi` (within some tolerance due to numerical precision).
-**HINT**: Download [test.scp][test_scp] and [test.txt][test_txt].
+**HINT**: Download [test_scp][test_scp] and [test_txt][test_txt].
 [test_scp]: kaldifeat/python/tests/test_data/test.scp
 [test_txt]: kaldifeat/python/tests/test_data/test.txt
@ -273,25 +103,13 @@ fbank = kaldifeat.Fbank(opts)
 features = fbank(wave.to(opts.device))
 ```
-## MFCC, PLP, Spectrogram
+## MFCC
 To compute MFCC features, please replace `kaldifeat.FbankOptions` and `kaldifeat.Fbank`
-with `kaldifeat.MfccOptions` and `kaldifeat.Mfcc`, respectively. The same goes
+with `kaldifeat.MfccOptions` and `kaldifeat.Mfcc`, respectively.
 for `PLP` and `Spectrogram`.
 Please refer to
  - [kaldifeat/python/tests/test_fbank.py](kaldifeat/python/tests/test_fbank.py)
  - [kaldifeat/python/tests/test_mfcc.py](kaldifeat/python/tests/test_mfcc.py)
  - [kaldifeat/python/tests/test_plp.py](kaldifeat/python/tests/test_plp.py)
  - [kaldifeat/python/tests/test_spectrogram.py](kaldifeat/python/tests/test_spectrogram.py)
  - [kaldifeat/python/tests/test_frame_extraction_options.py](kaldifeat/python/tests/test_frame_extraction_options.py)
  - [kaldifeat/python/tests/test_mel_bank_options.py](kaldifeat/python/tests/test_mel_bank_options.py)
  - [kaldifeat/python/tests/test_fbank_options.py](kaldifeat/python/tests/test_fbank_options.py)
  - [kaldifeat/python/tests/test_mfcc_options.py](kaldifeat/python/tests/test_mfcc_options.py)
  - [kaldifeat/python/tests/test_spectrogram_options.py](kaldifeat/python/tests/test_spectrogram_options.py)
  - [kaldifeat/python/tests/test_plp_options.py](kaldifeat/python/tests/test_plp_options.py)
 Please refer to [kaldifeat/python/tests/test_fbank.py](kaldifeat/python/tests/test_fbank.py)
 and [kaldifeat/python/tests/test_mfcc.py](kaldifeat/python/tests/test_mfcc.py)
 for more examples.
 **HINT**: In the examples, you can find that
@ -299,31 +117,33 @@ for more examples.
 - ``kaldifeat`` supports batch processing as well as chunk processing
 - ``kaldifeat`` uses the same options as `Kaldi`'s `compute-fbank-feats` and `compute-mfcc-feats`
-# Usage in other projects
+# Installation
-## icefall
+## From PyPi with pip
-[icefall](https://github.com/k2-fsa/icefall) uses kaldifeat to extract features for a pre-trained model.
+If you install `kaldifeat` using `pip`, it will also install
 PyTorch 1.8.1. If this is not what you want, please install `kaldifeat`
 from source (see below).
-See <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/conformer_ctc/pretrained.py>.
+```bash
 pip install kaldifeat
 ```
-## k2
+## From source
-[k2](https://github.com/k2-fsa/k2) uses kaldifeat's C++ API.
+The following are the commands to compile `kaldifeat` from source.
-
+We assume that you have installed `cmake` and PyTorch.
-See <https://github.com/k2-fsa/k2/blob/v2.0-pre/k2/torch/csrc/features.cu>.
+cmake 3.11 is known to work. Other cmake versions may also work.
-
+PyTorch 1.8.1 is known to work. Other PyTorch versions may also work.
 ## lhotse
 [lhotse](https://github.com/lhotse-speech/lhotse) uses kaldifeat to extract features on GPU.
 See <https://github.com/lhotse-speech/lhotse/blob/master/lhotse/features/kaldifeat.py>.
 ## sherpa
 [sherpa](https://github.com/k2-fsa/sherpa) uses kaldifeat for streaming feature
 extraction.
 See <https://github.com/k2-fsa/sherpa/blob/master/sherpa/bin/pruned_stateless_emformer_rnnt2/decode.py>
 ```bash
 mkdir /some/path
 git clone https://github.com/csukuangfj/kaldifeat.git
 cd kaldifeat
 python setup.py install
 ```
 To test whether `kaldifeat` was installed successfully, you can run:
 ```bash
 python3 -c "import kaldifeat; print(kaldifeat.__version__)"
 ```
--- a/cmake/cmake_extension.py
+++ b/cmake/cmake_extension.py
@ -2,34 +2,19 @@
 import glob
 import os
 import platform
 import shutil
 import sys
 from pathlib import Path
 import setuptools
 import torch
 from setuptools.command.build_ext import build_ext
 def get_pytorch_version():
    # if it is 1.7.1+cuda101, then strip +cuda101
    return torch.__version__.split("+")[0]
 def is_for_pypi():
    ans = os.environ.get("KALDIFEAT_IS_FOR_PYPI", None)
    return ans is not None
 def is_macos():
    return platform.system() == "Darwin"
 def is_windows():
    return platform.system() == "Windows"
 try:
    from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
@ -37,14 +22,15 @@ try:
        def finalize_options(self):
            _bdist_wheel.finalize_options(self)
            # In this case, the generated wheel has a name in the form
-            # kaldifeat-xxx-pyxx-none-any.whl
+            # k2-xxx-pyxx-none-any.whl
-            if is_for_pypi() and not is_macos():
+            if is_for_pypi():
                self.root_is_pure = True
            else:
                # The generated wheel has a name ending with
                # -linux_x86_64.whl
                self.root_is_pure = False
 except ImportError:
    bdist_wheel = None
@ -72,67 +58,33 @@ class BuildExtension(build_ext):
        if cmake_args == "":
            cmake_args = "-DCMAKE_BUILD_TYPE=Release"
-        extra_cmake_args = " -Dkaldifeat_BUILD_TESTS=OFF "
+        if make_args == "" and system_make_args == "":
-        extra_cmake_args += f" -DCMAKE_INSTALL_PREFIX={Path(self.build_lib).resolve()}/kaldifeat "  # noqa
+            print("For fast compilation, run:")
-
+            print('export KALDIFEAT_MAKE_ARGS="-j"; python setup.py install')
        major, minor = get_pytorch_version().split(".")[:2]
        print("major, minor", major, minor)
        major = int(major)
        minor = int(minor)
        if major > 2 or (major == 2 and minor >= 1):
            extra_cmake_args += f" -DCMAKE_CXX_STANDARD=17 "
        if "PYTHON_EXECUTABLE" not in cmake_args:
            print(f"Setting PYTHON_EXECUTABLE to {sys.executable}")
            cmake_args += f" -DPYTHON_EXECUTABLE={sys.executable}"
-        cmake_args += extra_cmake_args
+        build_cmd = f"""
            cd {self.build_temp}
-        if is_windows():
+            cmake {cmake_args} {kaldifeat_dir}
-            build_cmd = f"""
+
-                cmake {cmake_args} -B {self.build_temp} -S {kaldifeat_dir}
+
-                cmake --build {self.build_temp} --target _kaldifeat --config Release -- -m
+            make {make_args} _kaldifeat
-                cmake --build {self.build_temp} --target install --config Release -- -m
+        """
-            """
+        print(f"build command is:\n{build_cmd}")
-            print(f"build command is:\n{build_cmd}")
+
-            ret = os.system(
+        ret = os.system(build_cmd)
-                f"cmake {cmake_args} -B {self.build_temp} -S {kaldifeat_dir}"
+        if ret != 0:
            raise Exception(
                "\nBuild kaldifeat failed. Please check the error message.\n"
                "You can ask for help by creating an issue on GitHub.\n"
                "\nClick:\n\thttps://github.com/csukuangfj/kaldifeat/issues/new\n"  # noqa
            )
            if ret != 0:
                raise Exception("Failed to configure kaldifeat")
-            ret = os.system(
+        lib_so = glob.glob(f"{self.build_temp}/lib/*kaldifeat*.so")
-                f"cmake --build {self.build_temp} --target _kaldifeat --config Release -- -m"
+        for so in lib_so:
-            )
+            print(f"Copying {so} to {self.build_lib}/")
-            if ret != 0:
+            shutil.copy(f"{so}", f"{self.build_lib}/")
                raise Exception("Failed to build kaldifeat")
            ret = os.system(
                f"cmake --build {self.build_temp} --target install --config Release -- -m"
            )
            if ret != 0:
                raise Exception("Failed to install kaldifeat")
        else:
            if make_args == "" and system_make_args == "":
                print("For fast compilation, run:")
                print('export KALDIFEAT_MAKE_ARGS="-j"; python setup.py install')
                make_args = " -j4 "
                print("Setting make_args to '-j4'")
            build_cmd = f"""
                cd {self.build_temp}
                cmake {cmake_args} {kaldifeat_dir}
                make {make_args} _kaldifeat install
            """
            print(f"build command is:\n{build_cmd}")
            ret = os.system(build_cmd)
            if ret != 0:
                raise Exception(
                    "\nBuild kaldifeat failed. Please check the error message.\n"
                    "You can ask for help by creating an issue on GitHub.\n"
                    "\nClick:\n\thttps://github.com/csukuangfj/kaldifeat/issues/new\n"  # noqa
                )
--- a/cmake/googletest.cmake
+++ b/cmake/googletest.cmake
@ -18,34 +18,14 @@ function(download_googltest)
    # FetchContent is available since 3.11,
    # we've copied it to ${CMAKE_SOURCE_DIR}/cmake/Modules
    # so that it can be used in lower CMake versions.
-    message(STATUS "Use FetchContent provided by kaldifeat")
+    message(STATUS "Use FetchContent provided by k2")
    list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
  endif()
  include(FetchContent)
-  set(googletest_URL  "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz")
+  set(googletest_URL  "https://github.com/google/googletest/archive/release-1.10.0.tar.gz")
-  set(googletest_URL2 "https://huggingface.co/csukuangfj/k2-cmake-deps/resolve/main/googletest-1.13.0.tar.gz")
+  set(googletest_HASH "SHA256=9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb")
  set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363")
  # If you don't have access to the Internet,
  # please pre-download googletest
  set(possible_file_locations
    $ENV{HOME}/Downloads/googletest-1.13.0.tar.gz
    ${PROJECT_SOURCE_DIR}/googletest-1.13.0.tar.gz
    ${PROJECT_BINARY_DIR}/googletest-1.13.0.tar.gz
    /tmp/googletest-1.13.0.tar.gz
    /star-fj/fangjun/download/github/googletest-1.13.0.tar.gz
  )
  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(googletest_URL  "${f}")
      file(TO_CMAKE_PATH "${googletest_URL}" googletest_URL)
      set(googletest_URL2)
      break()
    endif()
  endforeach()
  set(BUILD_GMOCK ON CACHE BOOL "" FORCE)
  set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
@ -53,15 +33,13 @@ function(download_googltest)
  set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
  FetchContent_Declare(googletest
-    URL
+    URL               ${googletest_URL}
      ${googletest_URL}
      ${googletest_URL2}
    URL_HASH          ${googletest_HASH}
  )
  FetchContent_GetProperties(googletest)
  if(NOT googletest_POPULATED)
-    message(STATUS "Downloading googletest from ${googletest_URL}")
+    message(STATUS "Downloading googletest")
    FetchContent_Populate(googletest)
  endif()
  message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}")
--- a/cmake/kaldifeatConfig.cmake.in
+++ b/cmake/kaldifeatConfig.cmake.in
@ -1,65 +0,0 @@
 # Findkaldifeat
 # -------------
 #
 # Finds the kaldifeat library
 #
 # This will define the following variables:
 #
 #   KALDIFEAT_FOUND  -- True if the system has the kaldifeat library
 #   KALDIFEAT_INCLUDE_DIRS -- The include directories for kaldifeat
 #   KALDIFEAT_LIBRARIES    -- Libraries to link against
 #   KALDIFEAT_CXX_FLAGS -- Additional (required) compiler flags
 #   KALDIFEAT_TORCH_VERSION_MAJOR  -- The major version of PyTorch used to compile kaldifeat
 #   KALDIFEAT_TORCH_VERSION_MINOR  -- The minor version of PyTorch used to compile kaldifeat
 #   KALDIFEAT_VERSION -- The version of kaldifeat
 #
 # and the following imported targets:
 #
 #   kaldifeat_core
 # This file is modified from pytorch/cmake/TorchConfig.cmake.in
 set(KALDIFEAT_CXX_FLAGS "@CMAKE_CXX_FLAGS@")
 set(KALDIFEAT_TORCH_VERSION_MAJOR @KALDIFEAT_TORCH_VERSION_MAJOR@)
 set(KALDIFEAT_TORCH_VERSION_MINOR @KALDIFEAT_TORCH_VERSION_MINOR@)
 set(KALDIFEAT_VERSION @kaldifeat_VERSION@)
 if(DEFINED ENV{KALDIFEAT_INSTALL_PREFIX})
  set(KALDIFEAT_INSTALL_PREFIX $ENV{KALDIFEAT_INSTALL_PREFIX})
 else()
  # Assume we are in <install-prefix>/share/cmake/kaldifeat/kaldifeatConfig.cmake
  get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
  get_filename_component(KALDIFEAT_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
 endif()
 set(KALDIFEAT_INCLUDE_DIRS ${KALDIFEAT_INSTALL_PREFIX}/include)
 set(KALDIFEAT_LIBRARIES kaldifeat_core)
 foreach(lib IN LISTS KALDIFEAT_LIBRARIES)
  find_library(location_${lib} ${lib}
    PATHS
    "${KALDIFEAT_INSTALL_PREFIX}/lib"
    "${KALDIFEAT_INSTALL_PREFIX}/lib64"
  )
  if(NOT MSVC)
    add_library(${lib} SHARED IMPORTED)
  else()
    add_library(${lib} STATIC IMPORTED)
  endif()
  set_target_properties(${lib} PROPERTIES
    INTERFACE_INCLUDE_DIRECTORIES "${KALDIFEAT_INCLUDE_DIRS}"
      IMPORTED_LOCATION "${location_${lib}}"
      CXX_STANDARD 14
  )
  set_property(TARGET ${lib} PROPERTY INTERFACE_COMPILE_OPTIONS @CMAKE_CXX_FLAGS@)
 endforeach()
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(kaldifeat DEFAULT_MSG
  location_kaldifeat_core
 )
--- a/cmake/kaldifeatConfigVersion.cmake.in
+++ b/cmake/kaldifeatConfigVersion.cmake.in
@ -1,12 +0,0 @@
 # This file is modified from pytorch/cmake/TorchConfigVersion.cmake.in
 set(PACKAGE_VERSION "@kaldifeat_VERSION@")
 # Check whether the requested PACKAGE_FIND_VERSION is compatible
 if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}")
  set(PACKAGE_VERSION_COMPATIBLE FALSE)
 else()
  set(PACKAGE_VERSION_COMPATIBLE TRUE)
  if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}")
    set(PACKAGE_VERSION_EXACT TRUE)
  endif()
 endif()
--- a/cmake/pybind11.cmake
+++ b/cmake/pybind11.cmake
@ -8,39 +8,23 @@ function(download_pybind11)
  include(FetchContent)
-  set(pybind11_URL  "https://github.com/pybind/pybind11/archive/refs/tags/v2.12.0.tar.gz")
+  set(pybind11_URL  "https://github.com/pybind/pybind11/archive/v2.6.0.tar.gz")
-  set(pybind11_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/pybind11-2.12.0.tar.gz")
+  set(pybind11_HASH "SHA256=90b705137b69ee3b5fc655eaca66d0dc9862ea1759226f7ccd3098425ae69571")
  set(pybind11_HASH "SHA256=bf8f242abd1abcd375d516a7067490fb71abd79519a282d22b6e4d19282185a7")
  # If you don't have access to the Internet,
  # please pre-download pybind11
  set(possible_file_locations
    $ENV{HOME}/Downloads/pybind11-2.12.0.tar.gz
    ${CMAKE_SOURCE_DIR}/pybind11-2.12.0.tar.gz
    ${CMAKE_BINARY_DIR}/pybind11-2.12.0.tar.gz
    /tmp/pybind11-2.12.0.tar.gz
    /star-fj/fangjun/download/github/pybind11-2.12.0.tar.gz
  )
  foreach(f IN LISTS possible_file_locations)
    if(EXISTS ${f})
      set(pybind11_URL  "${f}")
      file(TO_CMAKE_PATH "${pybind11_URL}" pybind11_URL)
      set(pybind11_URL2)
      break()
    endif()
  endforeach()
  set(double_quotes "\"")
  set(dollar "\$")
  set(semicolon "\;")
  FetchContent_Declare(pybind11
-    URL
+    URL               ${pybind11_URL}
      ${pybind11_URL}
      ${pybind11_URL2}
    URL_HASH          ${pybind11_HASH}
    PATCH_COMMAND
      sed -i s/\\${double_quotes}-flto\\\\${dollar}/\\${double_quotes}-Xcompiler=-flto${dollar}/g "tools/pybind11Tools.cmake" &&
      sed -i s/${seimcolon}-fno-fat-lto-objects/${seimcolon}-Xcompiler=-fno-fat-lto-objects/g "tools/pybind11Tools.cmake"
  )
  FetchContent_GetProperties(pybind11)
  if(NOT pybind11_POPULATED)
-    message(STATUS "Downloading pybind11 from ${pybind11_URL}")
+    message(STATUS "Downloading pybind11")
    FetchContent_Populate(pybind11)
  endif()
  message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}")
--- a/cmake/torch.cmake
+++ b/cmake/torch.cmake
@ -8,7 +8,6 @@ execute_process(
  OUTPUT_STRIP_TRAILING_WHITESPACE
  OUTPUT_VARIABLE TORCH_DIR
 )
 message(STATUS "TORCH_DIR: ${TORCH_DIR}")
 list(APPEND CMAKE_PREFIX_PATH "${TORCH_DIR}")
 find_package(Torch REQUIRED)
@ -25,14 +24,16 @@ execute_process(
 message(STATUS "PyTorch version: ${TORCH_VERSION}")
-execute_process(
+# Solve the following error for NVCC:
-  COMMAND "${PYTHON_EXECUTABLE}" -c "import torch; print(torch.__version__.split('.')[0])"
+#   unknown option `-Wall`
-  OUTPUT_STRIP_TRAILING_WHITESPACE
+#
-  OUTPUT_VARIABLE KALDIFEAT_TORCH_VERSION_MAJOR
+# It contains only some -Wno-* flags, so it is OK
 # to set them to empty
 set_property(TARGET torch_cuda
  PROPERTY
    INTERFACE_COMPILE_OPTIONS ""
 )
-
+set_property(TARGET torch_cpu
-execute_process(
+  PROPERTY
-  COMMAND "${PYTHON_EXECUTABLE}" -c "import torch; print(torch.__version__.split('.')[1])"
+    INTERFACE_COMPILE_OPTIONS ""
  OUTPUT_STRIP_TRAILING_WHITESPACE
  OUTPUT_VARIABLE KALDIFEAT_TORCH_VERSION_MINOR
 )
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@ -1,6 +1,6 @@
 dataclasses
 recommonmark
-sphinx<7.0
+sphinx
 sphinx-autodoc-typehints
 sphinx_rtd_theme
 sphinxcontrib-bibtex
--- a/doc/source/_static/.gitkeep
+++ b/doc/source/_static/.gitkeep
--- a/doc/source/code/test_fbank.py
+++ b/doc/source/code/test_fbank.py
@ -0,0 +1,72 @@
 #!/usr/bin/env python3
 # Copyright      2021  Xiaomi Corporation (authors: Fangjun Kuang)
 import numpy as np
 import soundfile as sf
 import torch
 import kaldifeat
 def read_wave(filename) -> torch.Tensor:
    """Read a wave file and return it as a 1-D tensor.
    Note:
      You don't need to scale it to [-32768, 32767].
      We use scaling here to follow the approach in Kaldi.
    Args:
      filename:
        Filename of a sound file.
    Returns:
      Return a 1-D tensor containing audio samples.
    """
    with sf.SoundFile(filename) as sf_desc:
        sampling_rate = sf_desc.samplerate
        assert sampling_rate == 16000
        data = sf_desc.read(dtype=np.float32, always_2d=False)
    data *= 32768
    return torch.from_numpy(data)
 def test_fbank():
    device = torch.device("cpu")
    if torch.cuda.is_available():
        device = torch.device("cuda", 0)
    wave0 = read_wave("test_data/test.wav")
    wave1 = read_wave("test_data/test2.wav")
    wave0 = wave0.to(device)
    wave1 = wave1.to(device)
    opts = kaldifeat.FbankOptions()
    opts.frame_opts.dither = 0
    opts.device = device
    fbank = kaldifeat.Fbank(opts)
    # We can compute fbank features in batches
    features = fbank([wave0, wave1])
    assert isinstance(features, list), f"{type(features)}"
    assert len(features) == 2
    # We can also compute fbank features for a single wave
    features0 = fbank(wave0)
    features1 = fbank(wave1)
    assert torch.allclose(features[0], features0)
    assert torch.allclose(features[1], features1)
    # To compute fbank features for only a specified frame
    audio_frames = fbank.convert_samples_to_frames(wave0)
    feature_frame_1 = fbank.compute(audio_frames[1])
    feature_frame_10 = fbank.compute(audio_frames[10])
    assert torch.allclose(features0[1], feature_frame_1)
    assert torch.allclose(features0[10], feature_frame_10)
 if __name__ == "__main__":
    test_fbank()
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@ -59,7 +59,7 @@ templates_path = ["_templates"]
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ["images/*.md"]
+exclude_patterns = []
 source_suffix = {
    ".rst": "restructuredtext",
@ -102,35 +102,3 @@ html_theme_options = {
    "prev_next_buttons_location": "bottom",
    "style_external_links": True,
 }
 rst_epilog = """
 .. _kaldifeat: https://github.com/csukuangfj/kaldifeat
 .. _Kaldi: https://github.com/kaldi-asr/kaldi
 .. _PyTorch: https://pytorch.org/
 .. _kaldifeat.Fbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L10
 .. _kaldifeat.Mfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L10
 .. _kaldifeat.Plp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L10
 .. _kaldifeat.Spectrogram: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/spectrogram.py#L9
 .. _kaldifeat.OnlineFbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L16
 .. _kaldifeat.OnlineMfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L16
 .. _kaldifeat.OnlinePlp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L16
 .. _compute-fbank-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-fbank-feats.cc
 .. _compute-mfcc-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-mfcc-feats.cc
 .. _compute-plp-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-plp-feats.cc
 .. _compute-spectrogram-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-spectrogram-feats.cc
 .. _kaldi::OnlineFbank: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L160
 .. _kaldi::OnlineMfcc: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L158
 .. _kaldi::OnlinePlp: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L159
 .. _kaldifeat.FbankOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-fbank.h#L19
 .. _kaldi::FbankOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.h#L41
 .. _kaldifeat.MfccOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-mfcc.h#L22
 .. _kaldi::MfccOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-mfcc.h#L38
 .. _kaldifeat.PlpOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-plp.h#L24
 .. _kaldi::PlpOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-plp.h#L42
 .. _kaldifeat.SpectrogramOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-spectrogram.h#L18
 .. _kaldi::SpectrogramOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-spectrogram.h#L38
 .. _kaldifeat.FrameExtractionOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-window.h#L30
 .. _kaldi::FrameExtractionOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-window.h#L35
 .. _kaldifeat.MelBanksOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/mel-computations.h#L17
 .. _kaldi::MelBanksOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/mel-computations.h#L43
 """
--- a/doc/source/images/README.md
+++ b/doc/source/images/README.md
@ -1,8 +0,0 @@
 ## File description
 <https://shields.io/> is used to create the following files:
 - ./os.svg
 - ./python_ge_3.6-blue.svg
 - ./cuda_ge_10.1-orange.svg
 - ./pytorch_ge_1.5.0-green.svg
--- a/doc/source/images/cuda_ge_10.1-orange.svg
+++ b/doc/source/images/cuda_ge_10.1-orange.svg
@ -1 +0,0 @@
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="94" height="20" role="img" aria-label="cuda: &gt;= 10.1"><title>cuda: &gt;= 10.1</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="94" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="37" height="20" fill="#555"/><rect x="37" width="57" height="20" fill="#fe7d37"/><rect width="94" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="195" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="270">cuda</text><text x="195" y="140" transform="scale(.1)" fill="#fff" textLength="270">cuda</text><text aria-hidden="true" x="645" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">&gt;= 10.1</text><text x="645" y="140" transform="scale(.1)" fill="#fff" textLength="470">&gt;= 10.1</text></g></svg>
--- a/doc/source/images/os-green.svg
+++ b/doc/source/images/os-green.svg
@ -1 +0,0 @@
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="176" height="20" role="img" aria-label="os: Linux | macOS | Windows"><title>os: Linux | macOS | Windows</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="176" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="153" height="20" fill="#97ca00"/><rect width="176" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">os</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">os</text><text aria-hidden="true" x="985" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1430">Linux | macOS | Windows</text><text x="985" y="140" transform="scale(.1)" fill="#fff" textLength="1430">Linux | macOS | Windows</text></g></svg>
--- a/doc/source/images/python_ge_3.6-blue.svg
+++ b/doc/source/images/python_ge_3.6-blue.svg
@ -1 +0,0 @@
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: &gt;= 3.6"><title>python: &gt;= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">&gt;= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">&gt;= 3.6</text></g></svg>
--- a/doc/source/images/pytorch_ge_1.5.0-green.svg
+++ b/doc/source/images/pytorch_ge_1.5.0-green.svg
@ -1 +0,0 @@
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="112" height="20" role="img" aria-label="pytorch: &gt;= 1.5.0"><title>pytorch: &gt;= 1.5.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="112" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="51" height="20" fill="#555"/><rect x="51" width="61" height="20" fill="#97ca00"/><rect width="112" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="265" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="410">pytorch</text><text x="265" y="140" transform="scale(.1)" fill="#fff" textLength="410">pytorch</text><text aria-hidden="true" x="805" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">&gt;= 1.5.0</text><text x="805" y="140" transform="scale(.1)" fill="#fff" textLength="510">&gt;= 1.5.0</text></g></svg>
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@ -6,11 +6,19 @@
 kaldifeat
 =========
 `kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ implements
 feature extraction algorithms **compatible** with kaldi using PyTorch, supporting CUDA
 as well as autograd.
 Currently, only fbank features are supported.
 It can produce the same feature output as ``compute-fbank-feats`` (from kaldi)
 when given the same options.
 .. toctree::
   :maxdepth: 2
-   :caption: Contents
+   :caption: Contents:
-   intro
+   installation
-   installation/index
+   usage
   usage/index
--- a/doc/source/installation.rst
+++ b/doc/source/installation.rst
@ -0,0 +1,54 @@
 Installation
 ============
 .. _from source:
 Install kaldifeat from source
 -----------------------------
 You have to install ``cmake`` and ``PyTorch`` first.
  - ``cmake`` 3.11 is known to work. Other CMake versions may also work.
  - ``PyTorch`` 1.8.1 is known to work. Other PyTorch versions may also work.
  - Python >= 3.6
 The commands to install ``kaldifeat`` from source are:
 .. code-block:: bash
  git clone https://github.com/csukuangfj/kaldifeat
  cd kaldifeat
  python3 setup.py install
 To test that you have installed ``kaldifeat`` successfully, please run:
 .. code-block:: bash
  python3 -c "import kaldifeat; print(kaldifeat.__version__)"
 It should print the version, e.g., ``1.0``.
 Install kaldifeat from PyPI
 ---------------------------
 The pre-built ``kaldifeat`` hosted on PyPI uses PyTorch 1.8.1.
 If you install ``kaldifeat`` using pip, it will replace your locally
 installed PyTorch automatically with PyTorch 1.8.1.
 If you don't want this happen, please `Install kaldifeat from source`_.
 The command to install ``kaldifeat`` from PyPI is:
 .. code-block:: bash
  pip install kaldifeat
 To test that you have installed ``kaldifeat`` successfully, please run:
 .. code-block:: bash
  python3 -c "import kaldifeat; print(kaldifeat.__version__)"
 It should print the version, e.g., ``1.0``.
--- a/doc/source/installation/faq.rst
+++ b/doc/source/installation/faq.rst
@ -1,48 +0,0 @@
 FAQs
 ====
 How to install a CUDA version of kaldifeat from source
 ------------------------------------------------------
 You need to first install a CUDA version of `PyTorch`_ and then install `kaldifeat`_.
 .. note::
   You can use a CUDA version of `kaldifeat`_ on machines with no GPUs.
 How to install a CPU version of kaldifeat from source
 -----------------------------------------------------
 You need to first install a CPU version of `PyTorch`_ and then install `kaldifeat`_.
 How to fix `Caffe2: Cannot find cuDNN library`
 ----------------------------------------------
 .. code-block::
  Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN
  libraries.  Please set the proper cuDNN prefixes and / or install cuDNN.
 You will have such an error when you want to install a CUDA version of `kaldifeat`_
 by ``pip install kaldifeat`` or from source.
 You need to first install cuDNN. Assume you have installed cuDNN to the
 path ``/path/to/cudnn``. You can fix the error by using ``one`` of the following
 commands.
 (1) Fix for installation using ``pip install``
 .. code-block:: bash
    export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
    pip install --verbose kaldifeat
 (2) Fix for installation from source
 .. code-block:: bash
    mkdir /some/path
    git clone https://github.com/csukuangfj/kaldifeat.git
    cd kaldifeat
    export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
    python setup.py install
--- a/doc/source/installation/from_source.rst
+++ b/doc/source/installation/from_source.rst
@ -1,47 +0,0 @@
 .. _from source:
 Install kaldifeat from source
 =============================
 You have to install ``cmake`` and `PyTorch`_ first.
  - ``cmake`` 3.11 is known to work. Other CMake versions may also work.
  - `PyTorch`_ >= 1.5.0  is known to work. Other PyTorch versions may also work.
  - Python >= 3.6
  - A compiler that supports C++ 14
 The commands to install `kaldifeat`_ from source are:
 .. code-block:: bash
  git clone https://github.com/csukuangfj/kaldifeat
  cd kaldifeat
  python3 setup.py install
 To test that you have installed `kaldifeat`_ successfully, please run:
 .. code-block:: bash
  python3 -c "import kaldifeat; print(kaldifeat.__version__)"
 It should print the version, e.g., ``1.0``.
 .. _from PyPI:
 Install kaldifeat from PyPI
 ---------------------------
 The command to install `kaldifeat`_ from PyPI is:
 .. code-block:: bash
  pip install --verbose kaldifeat
 To test that you have installed `kaldifeat`_ successfully, please run:
 .. code-block:: bash
  python3 -c "import kaldifeat; print(kaldifeat.__version__)"
 It should print the version, e.g., ``1.0``.
--- a/doc/source/installation/from_wheels.rst
+++ b/doc/source/installation/from_wheels.rst
@ -1,139 +0,0 @@
 From pre-compiled wheels (Recommended)
 =======================================
 You can find pre-compiled wheels at
  - CPU wheels: `<https://csukuangfj.github.io/kaldifeat/cpu.html>`_
  - CUDA wheels: `<https://csukuangfj.github.io/kaldifeat/cuda.html>`_
 We give a few examples below to show you how to install `kaldifeat`_ from
 pre-compiled wheels.
 .. hint::
   The following lists only some examples. We suggest that you always select the
   latest version of ``kaldifeat``.
 Linux (CPU)
 -----------
 Suppose you want to install the following wheel:
 .. code-block:: bash
   https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 you can use one of the following methods:
 .. code-block:: bash
   # method 1
   pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
   pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
   # For users from China
   # 中国国内用户，如果访问不了 huggingface, 请使用
   # pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
   # method 2
   pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
   wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
   # For users from China
   # 中国国内用户，如果访问不了 huggingface, 请使用
   # wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/ubuntu-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
   pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 Windows (CPU)
 --------------
 Suppose you want to install the following wheel:
 .. code-block:: bash
   https://huggingface.co/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
 you can use one of the following methods:
 .. code-block:: bash
   # method 1
   pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
   pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
   # For users from China
   # 中国国内用户，如果访问不了 huggingface, 请使用
   # pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
   # method 2
   pip install torch==2.4.0+cpu -f https://download.pytorch.org/whl/torch/
   wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
   # For users from China
   # 中国国内用户，如果访问不了 huggingface, 请使用
   # wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/windows-cpu/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
   pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp312-cp312-win_amd64.whl
 macOS (CPU)
 -----------
 Suppose you want to install the following wheel:
 .. code-block:: bash
   https://huggingface.co/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
 you can use one of the following methods:
 .. code-block:: bash
   # method 1
   pip install torch==2.4.0
   pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
   # For users from China
   # 中国国内用户，如果访问不了 huggingface, 请使用
   # pip install kaldifeat==1.25.4.dev20240725+cpu.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cpu-cn.html
   # method 2
   pip install torch==2.4.0 -f https://download.pytorch.org/whl/torch/
   wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
   # For users from China
   # 中国国内用户，如果访问不了 huggingface, 请使用
   # wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/macos/kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
   pip install ./kaldifeat-1.25.4.dev20240725+cpu.torch2.4.0-cp311-cp311-macosx_11_0_arm64.whl
 Linux (CUDA)
 ------------
 Suppose you want to install the following wheel:
 .. code-block:: bash
   https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
 you can use one of the following methods:
 .. code-block:: bash
   # method 1
   pip install torch==2.4.0+cu124 -f https://download.pytorch.org/whl/torch/
   pip install kaldifeat==1.25.4.dev20240725+cuda12.4.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cuda.html
   # For users from China
   # 中国国内用户，如果访问不了 huggingface, 请使用
   # pip install kaldifeat==1.25.4.dev20240725+cuda12.4.torch2.4.0 -f https://csukuangfj.github.io/kaldifeat/cuda-cn.html
   # method 2
   pip install torch==2.4.0+cu124 -f https://download.pytorch.org/whl/torch/
   wget https://huggingface.co/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
   # For users from China
   # 中国国内用户，如果访问不了 huggingface, 请使用
   # wget https://hf-mirror.com/csukuangfj/kaldifeat/resolve/main/ubuntu-cuda/kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
   pip install ./kaldifeat-1.25.4.dev20240725+cuda12.4.torch2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
--- a/doc/source/installation/index.rst
+++ b/doc/source/installation/index.rst
@ -1,11 +0,0 @@
 Installation
 ============
 .. toctree::
   :maxdepth: 3
   ./from_wheels.rst
   ./from_source.rst
   ./faq.rst
--- a/doc/source/intro.rst
+++ b/doc/source/intro.rst
@ -1,103 +0,0 @@
 Introduction
 ============
 `kaldifeat`_ implements
 speech feature extraction algorithms **compatible** with `Kaldi`_ using `PyTorch`_,
 supporting CUDA as well as autograd.
 `kaldifeat`_ has the following features:
  - Fully compatible with `Kaldi`_
    .. note::
      The underlying C++ code is copied & modified from `Kaldi`_ directly.
      It is rewritten with `PyTorch` C++ APIs.
  - Provide not only ``C++ APIs`` but also ``Python APIs``
    .. note::
      You can access `kaldifeat`_ from ``Python``.
  - Support autograd
  - Support ``CUDA`` and ``CPU``
    .. note::
      You can use CUDA for feature extraction.
  - Support ``online`` (i.e., ``streaming``) and ``offline`` (i.e., ``non-streaming``)
    feature extraction
  - Support chunk-based processing
    .. note::
      This is especially usefull if you want to process audios of several
      hours long, which may cause OOM if you send them for computation at once.
      With chunk-based processing, you can process audios of arbirtray length.
  - Support batch processing
    .. note::
      With `kaldifeat`_ you can extract features for a batch of audios
 .. see https://sublime-and-sphinx-guide.readthedocs.io/en/latest/tables.html
 Currently implemented speech features and their counterparts in `Kaldi`_ are
 listed in the following table.
 .. list-table:: Supported speech features
   :widths: 50 50
   :header-rows: 1
   * - Supported speech features
     - Counterpart in `Kaldi`_
   * - `kaldifeat.Fbank`_
     - `compute-fbank-feats`_
   * - `kaldifeat.Mfcc`_
     - `compute-mfcc-feats`_
   * - `kaldifeat.Plp`_
     - `compute-plp-feats`_
   * - `kaldifeat.Spectrogram`_
     - `compute-spectrogram-feats`_
   * - `kaldifeat.OnlineFbank`_
     - `kaldi::OnlineFbank`_
   * - `kaldifeat.OnlineMfcc`_
     - `kaldi::OnlineMfcc`_
   * - `kaldifeat.OnlinePlp`_
     - `kaldi::OnlinePlp`_
 Each feature computer needs an option. The following table lists the options
 for each computer and the corresponding options in `Kaldi`_.
 .. hint::
   Note that we reuse the parameter names from `Kaldi`_.
   Also, both online feature computers and offline feature computers share the
   same option.
 .. list-table:: Feature computer options
   :widths: 50 50
   :header-rows: 1
   * - Options in `kaldifeat`_
     - Corresponding options in `Kaldi`_
   * - `kaldifeat.FbankOptions`_
     - `kaldi::FbankOptions`_
   * - `kaldifeat.MfccOptions`_
     - `kaldi::MfccOptions`_
   * - `kaldifeat.PlpOptions`_
     - `kaldi::PlpOptions`_
   * - `kaldifeat.SpectrogramOptions`_
     - `kaldi::SpectrogramOptions`_
   * - `kaldifeat.FrameExtractionOptions`_
     - `kaldi::FrameExtractionOptions`_
   * - `kaldifeat.MelBanksOptions`_
     - `kaldi::MelBanksOptions`_
 Read more to learn how to install `kaldifeat`_ and how to use each feature
 computer.
--- a/doc/source/usage.rst
+++ b/doc/source/usage.rst
@ -0,0 +1,212 @@
 Usage
 =====
 Let us first see the help message of kaldi's ``compute-fbank-feats``:
 .. code-block:: bash
    $ compute-fbank-feats
    Create Mel-filter bank (FBANK) feature files.
    Usage:  compute-fbank-feats [options...] <wav-rspecifier> <feats-wspecifier>
    Options:
      --allow-downsample          : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
      --allow-upsample            : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
      --blackman-coeff            : Constant coefficient for generalized Blackman window. (float, default = 0.42)
      --channel                   : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
      --debug-mel                 : Print out debugging information for mel bin computation (bool, default = false)
      --dither                    : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
      --energy-floor              : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0.  Suggested values: 0.1 or 1.0 (float, default = 0)
      --frame-length              : Frame length in milliseconds (float, default = 25)
      --frame-shift               : Frame shift in milliseconds (float, default = 10)
      --high-freq                 : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
      --htk-compat                : If true, put energy last.  Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
      --low-freq                  : Low cutoff frequency for mel bins (float, default = 20)
      --max-feature-vectors       : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
      --min-duration              : Minimum duration of segments to process (in seconds). (float, default = 0)
      --num-mel-bins              : Number of triangular mel-frequency bins (int, default = 23)
      --output-format             : Format of the output files [kaldi, htk] (string, default = "kaldi")
      --preemphasis-coefficient   : Coefficient for use in signal preemphasis (float, default = 0.97)
      --raw-energy                : If true, compute energy before preemphasis and windowing (bool, default = true)
      --remove-dc-offset          : Subtract mean from waveform on each frame (bool, default = true)
      --round-to-power-of-two     : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
      --sample-frequency          : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
      --snip-edges                : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length.  If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
      --subtract-mean             : Subtract mean of each feature file [CMS]; not recommended to do it this way.  (bool, default = false)
      --use-energy                : Add an extra dimension with energy to the FBANK output. (bool, default = false)
      --use-log-fbank             : If true, produce log-filterbank, else produce linear. (bool, default = true)
      --use-power                 : If true, use power, else use magnitude. (bool, default = true)
      --utt2spk                   : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
      --vtln-high                 : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
      --vtln-low                  : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
      --vtln-map                  : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
      --vtln-warp                 : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
      --window-type               : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
      --write-utt2dur             : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
    Standard options:
      --config                    : Configuration file to read (this option may be repeated) (string, default = "")
      --help                      : Print out usage message (bool, default = false)
      --print-args                : Print the command line arguments (to stderr) (bool, default = true)
      --verbose                   : Verbose level (higher->more logging) (int, default = 0)
 FbankOptions
 ------------
 ``kaldifeat`` reuses the same options from kaldi's ``compute-fbank-feats``.
 The following shows the default values of ``kaldifeat.FbankOptions``:
 .. code-block:: python
  >>> import kaldifeat
  >>> fbank_opts = kaldifeat.FbankOptions()
  >>> print(fbank_opts)
  frame_opts:
  samp_freq: 16000
  frame_shift_ms: 10
  frame_length_ms: 25
  dither: 1
  preemph_coeff: 0.97
  remove_dc_offset: 1
  window_type: povey
  round_to_power_of_two: 1
  blackman_coeff: 0.42
  snip_edges: 1
  mel_opts:
  num_bins: 23
  low_freq: 20
  high_freq: 0
  vtln_low: 100
  vtln_high: -500
  debug_mel: 0
  htk_mode: 0
  use_energy: 0
  energy_floor: 0
  raw_energy: 1
  htk_compat: 0
  use_log_fbank: 1
  use_power: 1
  device: cpu
 It consists of three parts:
  - ``frame_opts``
    Options in this part are accessed by ``frame_opts.xxx``. That is, to access
    the sample rate, you use:
      .. code-block:: python
        >>> fbank_opts = kaldifeat.FbankOptions()
        >>> print(fbank_opts.frame_opts.samp_freq)
        16000.0
  - ``mel_opts``
    Options in this part are accessed by ``mel_opts.xxx``. That is, to access
    the number of mel bins, you use:
      .. code-block:: python
        >>> fbank_opts = kaldifeat.FbankOptions()
        >>> print(fbank_opts.mel_opts.num_bins)
        23
  - fbank related
    Options in this part are accessed directly. That is, to access the device
    field, you use:
      .. code-block::
        >>> print(fbank_opts.device)
        cpu
        >>> fbank_opts.device = 'cuda:0'
        >>> print(fbank_opts.device)
        cuda:0
        >>> import torch
        >>> fbank_opts.device = torch.device('cuda', 0)
        >>> print(fbank_opts.device)
        cuda:0
 To change the sample rate to 8000, you can use:
 .. code-block:: python
  >>> fbank_opts = kaldifeat.FbankOptions()
  >>> print(fbank_opts.frame_opts.samp_freq)
  16000.0
  >>> fbank_opts.frame_opts.samp_freq = 8000
  >>> print(fbank_opts.frame_opts.samp_freq)
  8000.0
 To change ``snip_edges`` to ``False``, you can use:
 .. code-block:: python
  >>> fbank_opts.frame_opts.snip_edges = False
  >>> print(fbank_opts.frame_opts.snip_edges)
  False
 To change number of mel bins to 80, you can use:
 .. code-block:: python
  >>> print(fbank_opts.mel_opts.num_bins)
  23
  >>> fbank_opts.mel_opts.num_bins = 80
  >>> print(fbank_opts.mel_opts.num_bins)
  80
 To change the device to ``cuda``, you can use:
 Fbank
 -----
 The following shows how to use ``kaldifeat.Fbank`` to compute
 the fbank features of sound files.
 First, let us generate two sound files using ``sox``:
 .. code-block:: bash
  # generate a wav of two seconds, containing a sine-wave
  # swept from 300 Hz to 3300 Hz
  sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300
  # another sound file with 0.5 seconds
  sox -n -r 16000 -b 16 test2.wav synth 0.5 sine 300-3300
 .. hint::
  You can find the above two files by visiting the following two links:
    - `test.wav <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_data/test.wav>`_
    - `test2.wav <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_data/test2.wav>`_
 The `following code <https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_fbank.py>`_
 shows the usage of ``kaldifeat.Fbank``.
 It shows:
  - How to read a sound file. Note that audio samples are scaled to the range [-32768, 32768].
    The intention is to produce the same output as kaldi. You don't need to scale it if
    you don't care about the compatibility with kaldi
  - ``kaldifeat.Fbank`` supports CUDA as well as CPU
  - ``kaldifeat.Fbank`` supports processing sound file in a batch as well as accepting
    a single sound file
 .. literalinclude:: ./code/test_fbank.py
   :caption: Demo of ``kaldifeat.Fbank``
   :language: python
--- a/doc/source/usage/code/compute-fbank-feats-help.txt
+++ b/doc/source/usage/code/compute-fbank-feats-help.txt
@ -1,46 +0,0 @@
 compute-fbank-feats 
 Create Mel-filter bank (FBANK) feature files.
 Usage:  compute-fbank-feats [options...] <wav-rspecifier> <feats-wspecifier>
 Options:
  --allow-downsample          : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
  --allow-upsample            : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
  --blackman-coeff            : Constant coefficient for generalized Blackman window. (float, default = 0.42)
  --channel                   : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
  --debug-mel                 : Print out debugging information for mel bin computation (bool, default = false)
  --dither                    : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
  --energy-floor              : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0.  Suggested values: 0.1 or 1.0 (float, default = 0)
  --frame-length              : Frame length in milliseconds (float, default = 25)
  --frame-shift               : Frame shift in milliseconds (float, default = 10)
  --high-freq                 : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
  --htk-compat                : If true, put energy last.  Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
  --low-freq                  : Low cutoff frequency for mel bins (float, default = 20)
  --max-feature-vectors       : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
  --min-duration              : Minimum duration of segments to process (in seconds). (float, default = 0)
  --num-mel-bins              : Number of triangular mel-frequency bins (int, default = 23)
  --output-format             : Format of the output files [kaldi, htk] (string, default = "kaldi")
  --preemphasis-coefficient   : Coefficient for use in signal preemphasis (float, default = 0.97)
  --raw-energy                : If true, compute energy before preemphasis and windowing (bool, default = true)
  --remove-dc-offset          : Subtract mean from waveform on each frame (bool, default = true)
  --round-to-power-of-two     : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
  --sample-frequency          : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
  --snip-edges                : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length.  If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
  --subtract-mean             : Subtract mean of each feature file [CMS]; not recommended to do it this way.  (bool, default = false)
  --use-energy                : Add an extra dimension with energy to the FBANK output. (bool, default = false)
  --use-log-fbank             : If true, produce log-filterbank, else produce linear. (bool, default = true)
  --use-power                 : If true, use power, else use magnitude. (bool, default = true)
  --utt2spk                   : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
  --vtln-high                 : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
  --vtln-low                  : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
  --vtln-map                  : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
  --vtln-warp                 : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
  --window-type               : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
  --write-utt2dur             : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
 Standard options:
  --config                    : Configuration file to read (this option may be repeated) (string, default = "")
  --help                      : Print out usage message (bool, default = false)
  --print-args                : Print the command line arguments (to stderr) (bool, default = true)
  --verbose                   : Verbose level (higher->more logging) (int, default = 0)
--- a/doc/source/usage/code/fbank_options-1.txt
+++ b/doc/source/usage/code/fbank_options-1.txt
@ -1,65 +0,0 @@
 $ python3
 Python 3.8.0 (default, Oct 28 2019, 16:14:01)
 [GCC 8.3.0] on linux
 Type "help", "copyright", "credits" or "license" for more information.
 >>> import kaldifeat
 >>> opts = kaldifeat.FbankOptions()
 >>> print(opts)
 frame_opts:
 samp_freq: 16000
 frame_shift_ms: 10
 frame_length_ms: 25
 dither: 1
 preemph_coeff: 0.97
 remove_dc_offset: 1
 window_type: povey
 round_to_power_of_two: 1
 blackman_coeff: 0.42
 snip_edges: 1
 max_feature_vectors: -1
 mel_opts:
 num_bins: 23
 low_freq: 20
 high_freq: 0
 vtln_low: 100
 vtln_high: -500
 debug_mel: 0
 htk_mode: 0
 use_energy: 0
 energy_floor: 0
 raw_energy: 1
 htk_compat: 0
 use_log_fbank: 1
 use_power: 1
 device: cpu
 >>> print(opts.dither)
 Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
 AttributeError: '_kaldifeat.FbankOptions' object has no attribute 'dither'
 >>>
 >>> print(opts.frame_opts.dither)
 1.0
 >>> opts.frame_opts.dither = 0 # disable dither
 >>> print(opts.frame_opts.dither)
 0.0
 >>> import torch
 >>> print(opts.device)
 cpu
 >>> opts.device = 'cuda:0'
 >>> print(opts.device)
 cuda:0
 >>> opts.device = torch.device('cuda', 1)
 >>> print(opts.device)
 cuda:1
 >>> opts.device = 'cpu'
 >>> print(opts.device)
 cpu
 >>> print(opts.mel_opts.num_bins)
 23
 >>> opts.mel_opts.num_bins = 80
 >>> print(opts.mel_opts.num_bins)
 80
--- a/doc/source/usage/code/test_fbank_options.py
+++ b/doc/source/usage/code/test_fbank_options.py
@ -1 +0,0 @@
 ../../../../kaldifeat/python/tests/test_fbank_options.py
--- a/doc/source/usage/fbank.rst
+++ b/doc/source/usage/fbank.rst
@ -1,3 +0,0 @@
 kaldifeat.Fbank
 ===============
--- a/doc/source/usage/fbank_options.rst
+++ b/doc/source/usage/fbank_options.rst
@ -1,51 +0,0 @@
 kaldifeat.FbankOptions
 ======================
 If you want to construct an instance of `kaldifeat.Fbank`_ or
 `kaldifeat.OnlineFbank`_, you have to provide an instance of
 `kaldifeat.FbankOptions`_.
 The following code shows how to construct an instance of `kaldifeat.FbankOptions`_.
 .. literalinclude:: ./code/fbank_options-1.txt
   :caption: Usage of `kaldifeat.FbankOptions`_
   :emphasize-lines: 6,8,22,37
 Note that we reuse the same option name with `compute-fbank-feats`_ from `Kaldi`_:
 .. code-block:: bash
   $ compute-fbank-feats --help
 .. literalinclude:: ./code/compute-fbank-feats-help.txt
   :caption: Output of ``compute-fbank-feats --help``
 Please refer to the output of ``compute-fbank-feats --help`` for the meaning
 of each field of `kaldifeat.FbankOptions`_.
 One thing worth noting is that `kaldifeat.FbankOptions`_ has a field ``device``,
 which is an instance of ``torch.device``. You can assign it either a string, e.g.,
 ``"cpu"`` or ``"cuda:0"``, or an instance of ``torch.device``, e.g., ``torch.device("cpu")`` or
 ``torch.device("cuda", 1)``.
 .. hint::
   You can use this field to control whether the feature computer
   constructed from it performs computation on CPU or CUDA.
 .. caution::
   If you use a CUDA device, make sure that you have installed a CUDA version
   of `PyTorch`_.
 Example usage
 -------------
 The following code from
 `<https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/tests/test_fbank_options.py>`_
 demonstrate the usage of `kaldifeat.FbankOptions`_:
 .. literalinclude:: ./code/test_fbank_options.py
   :caption: Example usage of `kaldifeat.FbankOptions`_
   :language: python
--- a/doc/source/usage/index.rst
+++ b/doc/source/usage/index.rst
@ -1,11 +0,0 @@
 Usage
 =====
 This section describes how to use feature computers in `kaldifeat`_.
 .. toctree::
   :maxdepth: 2
   fbank_options
   fbank
   online_fbank
--- a/doc/source/usage/online_fbank.rst
+++ b/doc/source/usage/online_fbank.rst
@ -1,3 +0,0 @@
 kaldifeat.OnlineFbank
 =====================
--- a/get_version.py
+++ b/get_version.py
@ -1,106 +0,0 @@
 #!/usr/bin/env python3
 import datetime
 import os
 import platform
 import re
 import shutil
 import torch
 def is_macos():
    return platform.system() == "Darwin"
 def is_windows():
    return platform.system() == "Windows"
 def with_cuda():
    if shutil.which("nvcc") is None:
        return False
    if is_macos():
        return False
    return True
 def get_pytorch_version():
    # if it is 1.7.1+cuda101, then strip +cuda101
    return torch.__version__.split("+")[0]
 def get_cuda_version():
    from torch.utils import collect_env
    running_cuda_version = collect_env.get_running_cuda_version(collect_env.run)
    cuda_version = torch.version.cuda
    if running_cuda_version is not None and cuda_version is not None:
        assert cuda_version in running_cuda_version, (
            f"PyTorch is built with CUDA version: {cuda_version}.\n"
            f"The current running CUDA version is: {running_cuda_version}"
        )
    return cuda_version
 def is_for_pypi():
    ans = os.environ.get("KALDIFEAT_IS_FOR_PYPI", None)
    return ans is not None
 def is_stable():
    ans = os.environ.get("KALDIFEAT_IS_STABLE", None)
    return ans is not None
 def is_for_conda():
    ans = os.environ.get("KALDIFEAT_IS_FOR_CONDA", None)
    return ans is not None
 def get_package_version():
    # Set a default CUDA version here so that `pip install kaldifeat`
    # uses the default CUDA version.
    #
    default_cuda_version = "10.1"  # CUDA 10.1
    if with_cuda():
        cuda_version = get_cuda_version()
        if is_for_pypi() and default_cuda_version == cuda_version:
            cuda_version = ""
            pytorch_version = ""
            local_version = ""
        else:
            cuda_version = f"+cuda{cuda_version}"
            pytorch_version = get_pytorch_version()
            local_version = f"{cuda_version}.torch{pytorch_version}"
    else:
        pytorch_version = get_pytorch_version()
        local_version = f"+cpu.torch{pytorch_version}"
    if is_for_conda():
        local_version = ""
    if is_for_pypi() and is_macos():
        local_version = ""
    with open("CMakeLists.txt") as f:
        content = f.read()
    latest_version = re.search(r"set\(kaldifeat_VERSION (.*)\)", content).group(
        1
    )
    latest_version = latest_version.strip('"')
    if not is_stable():
        dt = datetime.datetime.utcnow()
        package_version = f"{latest_version}.dev{dt.year}{dt.month:02d}{dt.day:02d}{local_version}"
    else:
        package_version = f"{latest_version}"
    return package_version
 if __name__ == "__main__":
    print(get_package_version())
--- a/kaldifeat/CMakeLists.txt
+++ b/kaldifeat/CMakeLists.txt
@ -1,4 +1,2 @@
 add_subdirectory(csrc)
-if(kaldifeat_BUILD_PYMODULE)
+add_subdirectory(python)
  add_subdirectory(python)
 endif()
--- a/kaldifeat/csrc/CMakeLists.txt
+++ b/kaldifeat/csrc/CMakeLists.txt
@ -2,33 +2,15 @@
 set(kaldifeat_srcs
  feature-fbank.cc
  feature-functions.cc
  feature-mfcc.cc
  feature-plp.cc
  feature-spectrogram.cc
  feature-window.cc
  matrix-functions.cc
  mel-computations.cc
  online-feature.cc
  whisper-fbank.cc
 )
-add_library(kaldifeat_core ${kaldifeat_srcs})
+add_library(kaldifeat_core SHARED ${kaldifeat_srcs})
 target_link_libraries(kaldifeat_core PUBLIC ${TORCH_LIBRARIES})
 target_compile_definitions(kaldifeat_core PUBLIC KALDIFEAT_TORCH_VERSION_MAJOR=${KALDIFEAT_TORCH_VERSION_MAJOR})
 target_compile_definitions(kaldifeat_core PUBLIC KALDIFEAT_TORCH_VERSION_MINOR=${KALDIFEAT_TORCH_VERSION_MINOR})
 if(APPLE)
  execute_process(
    COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
    OUTPUT_STRIP_TRAILING_WHITESPACE
    OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
  )
  message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
  target_link_libraries(kaldifeat_core PUBLIC "-L ${PYTHON_SITE_PACKAGE_DIR}/../..")
 endif()
 add_executable(test_kaldifeat test_kaldifeat.cc)
 target_link_libraries(test_kaldifeat PRIVATE kaldifeat_core)
@ -42,52 +24,19 @@ function(kaldifeat_add_test source)
      gtest_main
  )
  # NOTE: We set the working directory here so that
  # it works also on windows. The reason is that
  # the required DLLs are inside ${TORCH_DIR}/lib
  # and they can be found by the exe if the current
  # working directory is ${TORCH_DIR}\lib
  add_test(NAME "Test.${name}"
    COMMAND
    $<TARGET_FILE:${name}>
    WORKING_DIRECTORY ${TORCH_DIR}/lib
  )
 endfunction()
-if(kaldifeat_BUILD_TESTS)
+if(BUILD_TESTS)
  # please sort the source files alphabetically
  set(test_srcs
    feature-window-test.cc
    online-feature-test.cc
  )
  foreach(source IN LISTS test_srcs)
    kaldifeat_add_test(${source})
  endforeach()
 endif()
 file(MAKE_DIRECTORY
  DESTINATION
    ${PROJECT_BINARY_DIR}/include/kaldifeat/csrc
 )
 file(GLOB_RECURSE all_headers *.h)
 message(STATUS "All headers: ${all_headers}")
 file(COPY
  ${all_headers}
  DESTINATION
    ${PROJECT_BINARY_DIR}/include/kaldifeat/csrc
 )
 if(BUILD_SHARED_LIBS AND WIN32)
  install(TARGETS kaldifeat_core
    DESTINATION ../
  )
 endif()
 install(TARGETS kaldifeat_core
  DESTINATION ${CMAKE_INSTALL_LIBDIR}
 )
 install(FILES ${all_headers}
  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/kaldifeat/csrc
 )
--- a/kaldifeat/csrc/CPPLINT.cfg
+++ b/kaldifeat/csrc/CPPLINT.cfg
@ -1 +0,0 @@
 exclude_files=whisper-mel-bank.h,whisper-v3-mel-bank.h
--- a/kaldifeat/csrc/feature-common-inl.h
+++ b/kaldifeat/csrc/feature-common-inl.h
@ -55,17 +55,10 @@ torch::Tensor OfflineFeatureTpl<F>::ComputeFeatures(const torch::Tensor &wave,
  int32_t padding = frame_opts.PaddedWindowSize() - strided_input.size(1);
  if (padding > 0) {
 #ifdef __ANDROID__
    auto padding_value = torch::zeros(
        {strided_input.size(0), padding},
        torch::dtype(torch::kFloat).device(strided_input.device()));
    strided_input = torch::cat({strided_input, padding_value}, 1);
 #else
    strided_input = torch::nn::functional::pad(
        strided_input, torch::nn::functional::PadFuncOptions({0, padding})
                           .mode(torch::kConstant)
                           .value(0));
 #endif
  }
  return computer_.Compute(log_energy_pre_window, vtln_warp, strided_input);
--- a/kaldifeat/csrc/feature-common.h
+++ b/kaldifeat/csrc/feature-common.h
@ -7,23 +7,7 @@
 #ifndef KALDIFEAT_CSRC_FEATURE_COMMON_H_
 #define KALDIFEAT_CSRC_FEATURE_COMMON_H_
 #include "kaldifeat/csrc/feature-functions.h"
 #include "kaldifeat/csrc/feature-window.h"
 // See  "The torch.fft module in PyTorch 1.7"
 // https://github.com/pytorch/pytorch/wiki/The-torch.fft-module-in-PyTorch-1.7
 #if KALDIFEAT_TORCH_VERSION_MAJOR > 1 || \
    (KALDIFEAT_TORCH_VERSION_MAJOR == 1 && KALDIFEAT_TORCH_VERSION_MINOR > 6)
 #include "torch/fft.h"
 #define KALDIFEAT_HAS_FFT_NAMESPACE
 // It uses torch::fft::rfft
 // Its input shape is [x, N], output shape is [x, N/2]
 // which is a complex tensor
 #else
 #include "ATen/Functions.h"
 // It uses torch::fft
 // Its input shape is [x, N], output shape is [x, N/2, 2]
 // which contains the real part [..., ], and imaginary part [..., 1]
 #endif
 namespace kaldifeat {
@ -34,7 +18,7 @@ class OfflineFeatureTpl {
  // Note: feature_window_function_ is the windowing function, which initialized
  // using the options class, that we cache at this level.
-  explicit OfflineFeatureTpl(const Options &opts)
+  OfflineFeatureTpl(const Options &opts)
      : computer_(opts),
        feature_window_function_(computer_.GetFrameOptions(), opts.device) {}
@ -62,10 +46,6 @@ class OfflineFeatureTpl {
  int32_t Dim() const { return computer_.Dim(); }
  const Options &GetOptions() const { return computer_.GetOptions(); }
  const FrameExtractionOptions &GetFrameOptions() const {
    return GetOptions().frame_opts;
  }
  // Copy constructor.
  OfflineFeatureTpl(const OfflineFeatureTpl<F> &) = delete;
  OfflineFeatureTpl<F> &operator=(const OfflineFeatureTpl<F> &) = delete;
--- a/kaldifeat/csrc/feature-fbank.cc
+++ b/kaldifeat/csrc/feature-fbank.cc
@ -8,6 +8,9 @@
 #include <cmath>
 #include "torch/fft.h"
 #include "torch/torch.h"
 namespace kaldifeat {
 std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
@ -63,29 +66,14 @@ torch::Tensor FbankComputer::Compute(torch::Tensor signal_raw_log_energy,
  }
  // note spectrum is in magnitude, not power, because of `abs()`
 #if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
  // signal_frame shape: [x, 512]
  // spectrum shape [x, 257]
  torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
 #else
  // signal_frame shape [x, 512]
  // real_imag shape [x, 257, 2],
  //   where [..., 0] is the real part
  //         [..., 1] is the imaginary part
  torch::Tensor real_imag = torch::rfft(signal_frame, 1);
  torch::Tensor real = real_imag.index({"...", 0});
  torch::Tensor imag = real_imag.index({"...", 1});
  torch::Tensor spectrum = (real.square() + imag.square()).sqrt();
 #endif
  // remove the last column, i.e., the highest fft bin
  spectrum = spectrum.index(
      {"...", torch::indexing::Slice(0, -1, torch::indexing::None)});
  // Use power instead of magnitude if requested.
-  if (opts_.use_power) {
+  if (opts_.use_power) spectrum.pow_(2);
    spectrum = spectrum.pow(2);
  }
  torch::Tensor mel_energies = mel_banks.Compute(spectrum);
  if (opts_.use_log_fbank) {
--- a/kaldifeat/csrc/feature-fbank.h
+++ b/kaldifeat/csrc/feature-fbank.h
@ -13,6 +13,7 @@
 #include "kaldifeat/csrc/feature-common.h"
 #include "kaldifeat/csrc/feature-window.h"
 #include "kaldifeat/csrc/mel-computations.h"
 #include "torch/torch.h"
 namespace kaldifeat {
@ -44,18 +45,20 @@ struct FbankOptions {
  std::string ToString() const {
    std::ostringstream os;
-    os << "FbankOptions(";
+    os << "frame_opts: \n";
    os << frame_opts << "\n";
    os << "\n";
-    os << "frame_opts=" << frame_opts.ToString() << ", ";
+    os << "mel_opts: \n";
-    os << "mel_opts=" << mel_opts.ToString() << ", ";
+    os << mel_opts << "\n";
-    os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
+    os << "use_energy: " << use_energy << "\n";
-    os << "energy_floor=" << energy_floor << ", ";
+    os << "energy_floor: " << energy_floor << "\n";
-    os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
+    os << "raw_energy: " << raw_energy << "\n";
-    os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
+    os << "htk_compat: " << htk_compat << "\n";
-    os << "use_log_fbank=" << (use_log_fbank ? "True" : "False") << ", ";
+    os << "use_log_fbank: " << use_log_fbank << "\n";
-    os << "use_power=" << (use_power ? "True" : "False") << ", ";
+    os << "use_power: " << use_power << "\n";
-    os << "device=\"" << device << "\")";
+    os << "device: " << device << "\n";
    return os.str();
  }
 };
--- a/kaldifeat/csrc/feature-functions.cc
+++ b/kaldifeat/csrc/feature-functions.cc
@ -1,33 +0,0 @@
 // kaldifeat/csrc/feature-functions.cc
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/feature-functions.cc
 #include "kaldifeat/csrc/feature-functions.h"
 #include <cmath>
 namespace kaldifeat {
 void InitIdftBases(int32_t n_bases, int32_t dimension, torch::Tensor *mat_out) {
  float angle = M_PI / (dimension - 1);
  float scale = 1.0f / (2 * (dimension - 1));
  *mat_out = torch::empty({n_bases, dimension}, torch::kFloat);
  float *data = mat_out->data_ptr<float>();
  int32_t stride = mat_out->stride(0);
  for (int32_t i = 0; i < n_bases; ++i) {
    float *this_row = data + i * stride;
    this_row[0] = scale;
    for (int32_t j = 1; j < dimension - 1; ++j) {
      this_row[j] = 2 * scale * std::cos(angle * i * j);
    }
    this_row[dimension - 1] = scale * std::cos(angle * i * (dimension - 1));
  }
 }
 }  // namespace kaldifeat
--- a/kaldifeat/csrc/feature-functions.h
+++ b/kaldifeat/csrc/feature-functions.h
@ -1,18 +0,0 @@
 // kaldifeat/csrc/feature-functions.h
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/feature-functions.h
 #ifndef KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_
 #define KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_
 #include "torch/script.h"
 namespace kaldifeat {
 void InitIdftBases(int32_t n_bases, int32_t dimension, torch::Tensor *mat_out);
 }
 #endif  // KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_
--- a/kaldifeat/csrc/feature-mfcc.cc
+++ b/kaldifeat/csrc/feature-mfcc.cc
@ -31,7 +31,7 @@ MfccComputer::MfccComputer(const MfccOptions &opts) : opts_(opts) {
  // energy we replace this with the energy.  This means a different
  // ordering of features than HTK.
-  using namespace torch::indexing;  // It imports: Slice, None  // NOLINT
+  using namespace torch::indexing;  // It imports: Slice, None
  // dct_matrix[:opts.num_cepts, :]
  torch::Tensor dct_rows =
@ -91,27 +91,14 @@ torch::Tensor MfccComputer::Compute(torch::Tensor signal_raw_log_energy,
  }
  // note spectrum is in magnitude, not power, because of `abs()`
 #if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
  // signal_frame shape: [x, 512]
  // spectrum shape [x, 257
  torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
 #else
  // signal_frame shape [x, 512]
  // real_imag shape [x, 257, 2],
  //   where [..., 0] is the real part
  //         [..., 1] is the imaginary part
  torch::Tensor real_imag = torch::rfft(signal_frame, 1);
  torch::Tensor real = real_imag.index({"...", 0});
  torch::Tensor imag = real_imag.index({"...", 1});
  torch::Tensor spectrum = (real.square() + imag.square()).sqrt();
 #endif
  // remove the last column, i.e., the highest fft bin
  spectrum = spectrum.index(
      {"...", torch::indexing::Slice(0, -1, torch::indexing::None)});
  // Use power instead of magnitude
-  spectrum = spectrum.pow(2);
+  spectrum.pow_(2);
  torch::Tensor mel_energies = mel_banks.Compute(spectrum);
--- a/kaldifeat/csrc/feature-mfcc.h
+++ b/kaldifeat/csrc/feature-mfcc.h
@ -7,13 +7,10 @@
 #ifndef KALDIFEAT_CSRC_FEATURE_MFCC_H_
 #define KALDIFEAT_CSRC_FEATURE_MFCC_H_
 #include <map>
 #include <string>
 #include "kaldifeat/csrc/feature-common.h"
 #include "kaldifeat/csrc/feature-window.h"
 #include "kaldifeat/csrc/mel-computations.h"
-#include "torch/script.h"
+#include "torch/torch.h"
 namespace kaldifeat {
@ -53,18 +50,20 @@ struct MfccOptions {
  std::string ToString() const {
    std::ostringstream os;
-    os << "MfccOptions(";
+    os << "frame_opts: \n";
-    os << "frame_opts=" << frame_opts.ToString() << ", ";
+    os << frame_opts << "\n";
-    os << "mel_opts=" << mel_opts.ToString() << ", ";
+    os << "\n";
-    os << "num_ceps=" << num_ceps << ", ";
+    os << "mel_opts: \n";
-    os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
+    os << mel_opts << "\n";
    os << "energy_floor=" << energy_floor << ", ";
    os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
    os << "cepstral_lifter=" << cepstral_lifter << ", ";
    os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
    os << "device=\"" << device << "\")";
    os << "num_ceps: " << num_ceps << "\n";
    os << "use_energy: " << use_energy << "\n";
    os << "energy_floor: " << energy_floor << "\n";
    os << "raw_energy: " << raw_energy << "\n";
    os << "cepstral_lifter: " << cepstral_lifter << "\n";
    os << "htk_compat: " << htk_compat << "\n";
    os << "device: " << device << "\n";
    return os.str();
  }
 };
--- a/kaldifeat/csrc/feature-plp.cc
+++ b/kaldifeat/csrc/feature-plp.cc
@ -1,185 +0,0 @@
 // kaldifeat/csrc/feature-plp.cc
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/feature-plp.cc
 #include "kaldifeat/csrc/feature-plp.h"
 #include "kaldifeat/csrc/feature-functions.h"
 namespace kaldifeat {
 std::ostream &operator<<(std::ostream &os, const PlpOptions &opts) {
  os << opts.ToString();
  return os;
 }
 PlpComputer::PlpComputer(const PlpOptions &opts) : opts_(opts) {
  // our num-ceps includes C0.
  KALDIFEAT_ASSERT(opts_.num_ceps <= opts_.lpc_order + 1);
  if (opts.cepstral_lifter != 0.0) {
    lifter_coeffs_ = torch::empty({1, opts.num_ceps}, torch::kFloat32);
    ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_);
    lifter_coeffs_ = lifter_coeffs_.to(opts.device);
  }
  InitIdftBases(opts_.lpc_order + 1, opts_.mel_opts.num_bins + 2, &idft_bases_);
  // CAUTION: we save a transposed version of idft_bases_
  idft_bases_ = idft_bases_.to(opts.device).t();
  if (opts.energy_floor > 0.0) log_energy_floor_ = logf(opts.energy_floor);
  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
  // [note: this call caches it.]
  GetMelBanks(1.0);
 }
 PlpComputer::~PlpComputer() {
  for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter)
    delete iter->second;
  for (auto iter = equal_loudness_.begin(); iter != equal_loudness_.end();
       ++iter)
    delete iter->second;
 }
 const MelBanks *PlpComputer::GetMelBanks(float vtln_warp) {
  MelBanks *this_mel_banks = nullptr;
  // std::map<float, MelBanks *>::iterator iter = mel_banks_.find(vtln_warp);
  auto iter = mel_banks_.find(vtln_warp);
  if (iter == mel_banks_.end()) {
    this_mel_banks =
        new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp, opts_.device);
    mel_banks_[vtln_warp] = this_mel_banks;
  } else {
    this_mel_banks = iter->second;
  }
  return this_mel_banks;
 }
 const torch::Tensor *PlpComputer::GetEqualLoudness(float vtln_warp) {
  const MelBanks *this_mel_banks = GetMelBanks(vtln_warp);
  torch::Tensor *ans = NULL;
  auto iter = equal_loudness_.find(vtln_warp);
  if (iter == equal_loudness_.end()) {
    ans = new torch::Tensor;
    GetEqualLoudnessVector(*this_mel_banks, ans);
    *ans = ans->to(opts_.device);
    equal_loudness_[vtln_warp] = ans;
  } else {
    ans = iter->second;
  }
  return ans;
 }
 // ans.shape [signal_frame.size(0), this->Dim()]
 torch::Tensor PlpComputer::Compute(torch::Tensor signal_raw_log_energy,
                                   float vtln_warp,
                                   const torch::Tensor &signal_frame) {
  KALDIFEAT_ASSERT(signal_frame.dim() == 2);
  KALDIFEAT_ASSERT(signal_frame.size(1) == opts_.frame_opts.PaddedWindowSize());
  const MelBanks &mel_banks = *GetMelBanks(vtln_warp);
  const torch::Tensor &equal_loudness = *GetEqualLoudness(vtln_warp);
  // torch.finfo(torch.float32).eps
  constexpr float kEps = 1.1920928955078125e-07f;
  // Compute energy after window function (not the raw one).
  if (opts_.use_energy && !opts_.raw_energy) {
    signal_raw_log_energy =
        torch::clamp_min(signal_frame.pow(2).sum(1), kEps).log();
  }
  // note spectrum is in magnitude, not power, because of `abs()`
 #if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
  // signal_frame shape: [x, 512]
  // spectrum shape [x, 257
  torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
 #else
  // signal_frame shape [x, 512]
  // real_imag shape [x, 257, 2],
  //   where [..., 0] is the real part
  //         [..., 1] is the imaginary part
  torch::Tensor real_imag = torch::rfft(signal_frame, 1);
  torch::Tensor real = real_imag.index({"...", 0});
  torch::Tensor imag = real_imag.index({"...", 1});
  torch::Tensor spectrum = (real.square() + imag.square()).sqrt();
 #endif
  // remove the last column, i.e., the highest fft bin
  spectrum = spectrum.index(
      {"...", torch::indexing::Slice(0, -1, torch::indexing::None)});
  // Use power instead of magnitude
  spectrum = spectrum.pow(2);
  torch::Tensor mel_energies = mel_banks.Compute(spectrum);
  mel_energies = torch::mul(mel_energies, equal_loudness);
  mel_energies = mel_energies.pow(opts_.compress_factor);
  // duplicate first and last elements
  //
  // left_padding = wave[:num_left_padding].flip(dims=(0,))
  // first = mel_energies[:, 0]
  // first.shape [num_frames, 1]
  torch::Tensor first = mel_energies.index({"...", 0}).unsqueeze(-1);
  // last = mel_energies[:, -1]
  // last.shape [num_frames, 1]
  torch::Tensor last = mel_energies.index({"...", -1}).unsqueeze(-1);
  mel_energies = torch::cat({first, mel_energies, last}, 1);
  torch::Tensor autocorr_coeffs = torch::mm(mel_energies, idft_bases_);
  torch::Tensor lpc_coeffs;
  torch::Tensor residual_log_energy = ComputeLpc(autocorr_coeffs, &lpc_coeffs);
  residual_log_energy = torch::clamp_min(residual_log_energy, kEps);
  torch::Tensor raw_cepstrum = Lpc2Cepstrum(lpc_coeffs);
  // torch.cat((residual_log_energy.unsqueeze(-1),
  // raw_cepstrum[:opts.num_ceps-1]), 1)
  //
  using namespace torch::indexing;  // It imports: Slice, None // NOLINT
  torch::Tensor features = torch::cat(
      {residual_log_energy.unsqueeze(-1),
       raw_cepstrum.index({"...", Slice(0, opts_.num_ceps - 1, None)})},
      1);
  if (opts_.cepstral_lifter != 0.0) {
    features = torch::mul(features, lifter_coeffs_);
  }
  if (opts_.cepstral_scale != 1.0) {
    features = features * opts_.cepstral_scale;
  }
  if (opts_.use_energy) {
    if (opts_.energy_floor > 0.0f) {
      signal_raw_log_energy =
          torch::clamp_min(signal_raw_log_energy, log_energy_floor_);
    }
    // column 0 is replaced by signal_raw_log_energy
    //
    // features[:, 0] = signal_raw_log_energy
    //
    features.index({"...", 0}) = signal_raw_log_energy;
  }
  if (opts_.htk_compat) {  // reorder the features.
    // shift left, so the original 0th column
    // becomes the last column;
    // the original first column becomes the 0th column
    features = torch::roll(features, -1, 1);
  }
  return features;
 }
 }  // namespace kaldifeat
--- a/kaldifeat/csrc/feature-plp.h
+++ b/kaldifeat/csrc/feature-plp.h
@ -1,129 +0,0 @@
 // kaldifeat/csrc/feature-plp.h
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/feature-plp.h
 #ifndef KALDIFEAT_CSRC_FEATURE_PLP_H_
 #define KALDIFEAT_CSRC_FEATURE_PLP_H_
 #include <map>
 #include <string>
 #include "kaldifeat/csrc/feature-common.h"
 #include "kaldifeat/csrc/feature-window.h"
 #include "kaldifeat/csrc/mel-computations.h"
 #include "torch/script.h"
 namespace kaldifeat {
 /// PlpOptions contains basic options for computing PLP features.
 /// It only includes things that can be done in a "stateless" way, i.e.
 /// it does not include energy max-normalization.
 /// It does not include delta computation.
 struct PlpOptions {
  FrameExtractionOptions frame_opts;
  MelBanksOptions mel_opts;
  // Order of LPC analysis in PLP computation
  //
  // 12 seems to be common for 16kHz-sampled data. For 8kHz-sampled
  // data, 15 may be better.
  int32_t lpc_order = 12;
  // Number of cepstra in PLP computation (including C0)
  int32_t num_ceps = 13;
  bool use_energy = true;  // use energy; else C0
  // Floor on energy (absolute, not relative) in PLP computation.
  // Only makes a difference if --use-energy=true; only necessary if
  // dither is 0.0.  Suggested values: 0.1 or 1.0
  float energy_floor = 0.0;
  // If true, compute energy before preemphasis and windowing
  bool raw_energy = true;
  // Compression factor in PLP computation
  float compress_factor = 0.33333;
  // Constant that controls scaling of PLPs
  int32_t cepstral_lifter = 22;
  // Scaling constant in PLP computation
  float cepstral_scale = 1.0;
  bool htk_compat = false;  // if true, put energy/C0 last and introduce a
                            // factor of sqrt(2) on C0 to be the same as HTK.
                            //
  torch::Device device{"cpu"};
  PlpOptions() { mel_opts.num_bins = 23; }
  std::string ToString() const {
    std::ostringstream os;
    os << "PlpOptions(";
    os << "frame_opts=" << frame_opts.ToString() << ", ";
    os << "mel_opts=" << mel_opts.ToString() << ", ";
    os << "lpc_order=" << lpc_order << ", ";
    os << "num_ceps=" << num_ceps << ", ";
    os << "use_energy=" << (use_energy ? "True" : "False") << ", ";
    os << "energy_floor=" << energy_floor << ", ";
    os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
    os << "compress_factor=" << compress_factor << ", ";
    os << "cepstral_lifter=" << cepstral_lifter << ", ";
    os << "cepstral_scale=" << cepstral_scale << ", ";
    os << "htk_compat=" << (htk_compat ? "True" : "False") << ", ";
    os << "device=\"" << device << "\")";
    return os.str();
  }
 };
 std::ostream &operator<<(std::ostream &os, const PlpOptions &opts);
 class PlpComputer {
 public:
  using Options = PlpOptions;
  explicit PlpComputer(const PlpOptions &opts);
  ~PlpComputer();
  PlpComputer &operator=(const PlpComputer &) = delete;
  PlpComputer(const PlpComputer &) = delete;
  int32_t Dim() const { return opts_.num_ceps; }
  bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
  const FrameExtractionOptions &GetFrameOptions() const {
    return opts_.frame_opts;
  }
  const PlpOptions &GetOptions() const { return opts_; }
  // signal_raw_log_energy is log_energy_pre_window, which is not empty
  // iff NeedRawLogEnergy() returns true.
  torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp,
                        const torch::Tensor &signal_frame);
 private:
  const MelBanks *GetMelBanks(float vtln_warp);
  const torch::Tensor *GetEqualLoudness(float vtln_warp);
  PlpOptions opts_;
  torch::Tensor lifter_coeffs_;
  torch::Tensor idft_bases_;  // 2-D tensor, kFloat. Caution: it is transposed
  float log_energy_floor_;
  std::map<float, MelBanks *> mel_banks_;  // float is VTLN coefficient.
  // value is a 1-D torch.Tensor
  std::map<float, torch::Tensor *> equal_loudness_;
 };
 using Plp = OfflineFeatureTpl<PlpComputer>;
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_CSRC_FEATURE_PLP_H_
--- a/kaldifeat/csrc/feature-spectrogram.cc
+++ b/kaldifeat/csrc/feature-spectrogram.cc
@ -1,78 +0,0 @@
 // kaldifeat/csrc/feature-spectrogram.cc
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/feature-spectrogram.cc
 #include "kaldifeat/csrc/feature-spectrogram.h"
 namespace kaldifeat {
 std::ostream &operator<<(std::ostream &os, const SpectrogramOptions &opts) {
  os << opts.ToString();
  return os;
 }
 SpectrogramComputer::SpectrogramComputer(const SpectrogramOptions &opts)
    : opts_(opts) {
  if (opts.energy_floor > 0.0) log_energy_floor_ = logf(opts.energy_floor);
 }
 // ans.shape [signal_frame.size(0), this->Dim()]
 torch::Tensor SpectrogramComputer::Compute(torch::Tensor signal_raw_log_energy,
                                           float vtln_warp,
                                           const torch::Tensor &signal_frame) {
  KALDIFEAT_ASSERT(signal_frame.dim() == 2);
  KALDIFEAT_ASSERT(signal_frame.size(1) == opts_.frame_opts.PaddedWindowSize());
  // torch.finfo(torch.float32).eps
  constexpr float kEps = 1.1920928955078125e-07f;
  // Compute energy after window function (not the raw one).
  if (!opts_.raw_energy) {
    signal_raw_log_energy =
        torch::clamp_min(signal_frame.pow(2).sum(1), kEps).log();
  }
  // note spectrum is in magnitude, not power, because of `abs()`
 #if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
  // signal_frame shape: [x, 512]
  // spectrum shape [x, 257
  torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs();
 #else
  // signal_frame shape [x, 512]
  // real_imag shape [x, 257, 2],
  //   where [..., 0] is the real part
  //         [..., 1] is the imaginary part
  torch::Tensor real_imag = torch::rfft(signal_frame, 1);
  torch::Tensor real = real_imag.index({"...", 0});
  torch::Tensor imag = real_imag.index({"...", 1});
  torch::Tensor spectrum = (real.square() + imag.square()).sqrt();
 #endif
  if (opts_.return_raw_fft) {
    KALDIFEAT_ERR << "return raw fft is not supported yet";
  }
  // compute power spectrum
  spectrum = spectrum.pow(2);
  // NOTE: take the log
  spectrum = torch::clamp_min(spectrum, kEps).log();
  if (opts_.energy_floor > 0.0f) {
    signal_raw_log_energy =
        torch::clamp_min(signal_raw_log_energy, log_energy_floor_);
  }
  // The zeroth spectrogram component is always set to the signal energy,
  // instead of the square of the constant component of the signal.
  //
  // spectrum[:,0] = signal_raw_log_energy
  spectrum.index({"...", 0}) = signal_raw_log_energy;
  return spectrum;
 }
 }  // namespace kaldifeat
--- a/kaldifeat/csrc/feature-spectrogram.h
+++ b/kaldifeat/csrc/feature-spectrogram.h
@ -1,92 +0,0 @@
 // kaldifeat/csrc/feature-spectrogram.h
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/feature-spectrogram.h
 #ifndef KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_
 #define KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_
 #include <string>
 #include "kaldifeat/csrc/feature-common.h"
 #include "kaldifeat/csrc/feature-window.h"
 #include "torch/script.h"
 namespace kaldifeat {
 struct SpectrogramOptions {
  FrameExtractionOptions frame_opts;
  // Floor on energy (absolute, not relative) in Spectrogram
  // computation.  Caution: this floor is applied to the
  // zeroth component, representing the total signal energy.
  // The floor on the individual spectrogram elements is fixed at
  // std::numeric_limits<float>::epsilon()
  float energy_floor = 0.0;
  // If true, compute energy before preemphasis and windowing
  bool raw_energy = true;
  // If true, return raw FFT complex numbers instead of log magnitudes
  // Not implemented yet
  bool return_raw_fft = false;
  torch::Device device{"cpu"};
  std::string ToString() const {
    std::ostringstream os;
    os << "SpectrogramOptions(";
    os << "frame_opts=" << frame_opts.ToString() << ", ";
    os << "energy_floor=" << energy_floor << ", ";
    os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
    os << "return_raw_fft=" << (return_raw_fft ? "True" : "False") << ", ";
    os << "device=\"" << device << "\")";
    return os.str();
  }
 };
 std::ostream &operator<<(std::ostream &os, const SpectrogramOptions &opts);
 class SpectrogramComputer {
 public:
  using Options = SpectrogramOptions;
  explicit SpectrogramComputer(const SpectrogramOptions &opts);
  ~SpectrogramComputer() = default;
  const FrameExtractionOptions &GetFrameOptions() const {
    return opts_.frame_opts;
  }
  const SpectrogramOptions &GetOptions() const { return opts_; }
  int32_t Dim() const {
    if (opts_.return_raw_fft) {
      return opts_.frame_opts.PaddedWindowSize();
    } else {
      return opts_.frame_opts.PaddedWindowSize() / 2 + 1;
    }
  }
  bool NeedRawLogEnergy() const { return opts_.raw_energy; }
  // signal_raw_log_energy is log_energy_pre_window, which is not empty
  // iff NeedRawLogEnergy() returns true.
  //
  // vtln_warp is ignored by this function, it's only
  // needed for interface compatibility.
  torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp,
                        const torch::Tensor &signal_frame);
 private:
  SpectrogramOptions opts_;
  float log_energy_floor_;
 };
 using Spectrogram = OfflineFeatureTpl<SpectrogramComputer>;
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_
--- a/kaldifeat/csrc/feature-window.cc
+++ b/kaldifeat/csrc/feature-window.cc
@ -7,7 +7,8 @@
 #include "kaldifeat/csrc/feature-window.h"
 #include <cmath>
-#include <vector>
+
 #include "torch/torch.h"
 #ifndef M_2PI
 #define M_2PI 6.283185307179586476925286766559005
@ -29,13 +30,6 @@ FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts,
  float *window_data = window.data_ptr<float>();
  double a = M_2PI / (frame_length - 1);
  if (opts.window_type == "hann") {
    // see https://pytorch.org/docs/stable/generated/torch.hann_window.html
    // We assume periodic is true
    a = M_2PI / frame_length;
  }
  for (int32_t i = 0; i < frame_length; i++) {
    double i_fl = static_cast<double>(i);
    if (opts.window_type == "hanning") {
@ -46,8 +40,6 @@ FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts,
      window_data[i] = sin(0.5 * a * i_fl);
    } else if (opts.window_type == "hamming") {
      window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
    } else if (opts.window_type == "hann") {
      window_data[i] = 0.50 - 0.50 * cos(a * i_fl);
    } else if (opts.window_type ==
               "povey") {  // like hamming but goes to zero at edges.
      window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
@ -164,26 +156,25 @@ torch::Tensor GetStrided(const torch::Tensor &wave,
 }
 torch::Tensor Dither(const torch::Tensor &wave, float dither_value) {
-  if (dither_value == 0.0f) return wave;
+  if (dither_value == 0.0f) wave;
  torch::Tensor rand_gauss = torch::randn_like(wave);
 #if 1
  return wave + rand_gauss * dither_value;
 #else
-  // use in-place version of wave and change it to pointer type
+  // use in-place version of wave and change its to pointer type
  wave_->add_(rand_gauss, dither_value);
 #endif
 }
 torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave) {
  using namespace torch::indexing;  // It imports: Slice, None
  if (preemph_coeff == 0.0f) return wave;
  KALDIFEAT_ASSERT(preemph_coeff >= 0.0f && preemph_coeff <= 1.0f);
  torch::Tensor ans = torch::empty_like(wave);
  using torch::indexing::None;
  using torch::indexing::Slice;
  // right = wave[:, 1:]
  torch::Tensor right = wave.index({"...", Slice(1, None, None)});
@ -198,59 +189,4 @@ torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave) {
  return ans;
 }
 torch::Tensor ExtractWindow(int64_t sample_offset, const torch::Tensor &wave,
                            int32_t f, const FrameExtractionOptions &opts) {
  KALDIFEAT_ASSERT(sample_offset >= 0 && wave.numel() != 0);
  int32_t frame_length = opts.WindowSize();
  int64_t num_samples = sample_offset + wave.numel();
  int64_t start_sample = FirstSampleOfFrame(f, opts);
  int64_t end_sample = start_sample + frame_length;
  if (opts.snip_edges) {
    KALDIFEAT_ASSERT(start_sample >= sample_offset &&
                     end_sample <= num_samples);
  } else {
    KALDIFEAT_ASSERT(sample_offset == 0 || start_sample >= sample_offset);
  }
  // wave_start and wave_end are start and end indexes into 'wave', for the
  // piece of wave that we're trying to extract.
  int32_t wave_start = static_cast<int32_t>(start_sample - sample_offset);
  int32_t wave_end = wave_start + frame_length;
  if (wave_start >= 0 && wave_end <= wave.numel()) {
    // the normal case -- no edge effects to consider.
    // return wave[wave_start:wave_end]
    return wave.index({torch::indexing::Slice(wave_start, wave_end)});
  } else {
    torch::Tensor window = torch::empty({frame_length}, torch::kFloat);
    auto p_window = window.accessor<float, 1>();
    auto p_wave = wave.accessor<float, 1>();
    // Deal with any end effects by reflection, if needed.  This code will only
    // be reached for about two frames per utterance, so we don't concern
    // ourselves excessively with efficiency.
    int32_t wave_dim = wave.numel();
    for (int32_t s = 0; s != frame_length; ++s) {
      int32_t s_in_wave = s + wave_start;
      while (s_in_wave < 0 || s_in_wave >= wave_dim) {
        // reflect around the beginning or end of the wave.
        // e.g. -1 -> 0, -2 -> 1.
        // dim -> dim - 1, dim + 1 -> dim - 2.
        // the code supports repeated reflections, although this
        // would only be needed in pathological cases.
        if (s_in_wave < 0) {
          s_in_wave = -s_in_wave - 1;
        } else {
          s_in_wave = 2 * wave_dim - 1 - s_in_wave;
        }
      }
      p_window[s] = p_wave[s_in_wave];
    }
    return window;
  }
 }
 }  // namespace kaldifeat
--- a/kaldifeat/csrc/feature-window.h
+++ b/kaldifeat/csrc/feature-window.h
@ -4,11 +4,8 @@
 // This file is copied/modified from kaldi/src/feat/feature-window.h
 #include <string>
 #include "kaldifeat/csrc/log.h"
-#include "torch/all.h"
+#include "torch/torch.h"
 #include "torch/script.h"
 #ifndef KALDIFEAT_CSRC_FEATURE_WINDOW_H_
 #define KALDIFEAT_CSRC_FEATURE_WINDOW_H_
@ -44,11 +41,7 @@ struct FrameExtractionOptions {
  bool snip_edges = true;
  // bool allow_downsample = false;
  // bool allow_upsample = false;
-
+  // int32_t max_feature_vectors = -1;
  // Used for streaming feature extraction. It indicates the number
  // of feature frames to keep in the recycling vector. -1 means to
  // keep all feature frames.
  int32_t max_feature_vectors = -1;
  int32_t WindowShift() const {
    return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
@ -62,20 +55,21 @@ struct FrameExtractionOptions {
  }
  std::string ToString() const {
    std::ostringstream os;
-    os << "FrameExtractionOptions(";
+#define KALDIFEAT_PRINT(x) os << #x << ": " << x << "\n"
-    os << "samp_freq=" << samp_freq << ", ";
+    KALDIFEAT_PRINT(samp_freq);
-    os << "frame_shift_ms=" << frame_shift_ms << ", ";
+    KALDIFEAT_PRINT(frame_shift_ms);
-    os << "frame_length_ms=" << frame_length_ms << ", ";
+    KALDIFEAT_PRINT(frame_length_ms);
-    os << "dither=" << dither << ", ";
+    KALDIFEAT_PRINT(dither);
-    os << "preemph_coeff=" << preemph_coeff << ", ";
+    KALDIFEAT_PRINT(preemph_coeff);
-    os << "remove_dc_offset=" << (remove_dc_offset ? "True" : "False") << ", ";
+    KALDIFEAT_PRINT(remove_dc_offset);
-    os << "window_type=" << '"' << window_type << '"' << ", ";
+    KALDIFEAT_PRINT(window_type);
-    os << "round_to_power_of_two=" << (round_to_power_of_two ? "True" : "False")
+    KALDIFEAT_PRINT(round_to_power_of_two);
-       << ", ";
+    KALDIFEAT_PRINT(blackman_coeff);
-    os << "blackman_coeff=" << blackman_coeff << ", ";
+    KALDIFEAT_PRINT(snip_edges);
-    os << "snip_edges=" << (snip_edges ? "True" : "False") << ", ";
+    // KALDIFEAT_PRINT(allow_downsample);
-    os << "max_feature_vectors=" << max_feature_vectors << ")";
+    // KALDIFEAT_PRINT(allow_upsample);
-
+    // KALDIFEAT_PRINT(max_feature_vectors);
 #undef KALDIFEAT_PRINT
    return os.str();
  }
 };
@ -103,11 +97,11 @@ class FeatureWindowFunction {
      @param [in] flush   True if we are asserting that this number of samples
   is 'all there is', false if we expecting more data to possibly come in.  This
-   only makes a difference to the answer
+   only makes a difference to the answer if opts.snips_edges
-   if opts.snips_edges== false.  For offline feature extraction you always want
+             == false.  For offline feature extraction you always want flush ==
-   flush == true.  In an online-decoding context, once you know (or decide) that
+             true.  In an online-decoding context, once you know (or decide)
-   no more data is coming in, you'd call it with flush == true at the end to
+   that no more data is coming in, you'd call it with flush == true at the end
-   flush out any remaining data.
+   to flush out any remaining data.
 */
 int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
                  bool flush = true);
@ -136,29 +130,6 @@ torch::Tensor Dither(const torch::Tensor &wave, float dither_value);
 torch::Tensor Preemphasize(float preemph_coeff, const torch::Tensor &wave);
 /*
  ExtractWindow() extracts "frame_length" samples from the given waveform.
  Note: This function only extracts "frame_length" samples
  from the input waveform, without any further processing.
  @param [in] sample_offset  If 'wave' is not the entire waveform, but
                   part of it to the left has been discarded, then the
                   number of samples prior to 'wave' that we have
                   already discarded.  Set this to zero if you are
                   processing the entire waveform in one piece, or
                   if you get 'no matching function' compilation
                   errors when updating the code.
  @param [in] wave  The waveform
  @param [in] f     The frame index to be extracted, with
                    0 <= f < NumFrames(sample_offset + wave.numel(), opts, true)
  @param [in] opts  The options class to be used
  @return  Return a tensor containing "frame_length" samples extracted from
           `wave`, without any further processing. Its shape is
           (1, frame_length).
 */
 torch::Tensor ExtractWindow(int64_t sample_offset, const torch::Tensor &wave,
                            int32_t f, const FrameExtractionOptions &opts);
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_CSRC_FEATURE_WINDOW_H_
--- a/kaldifeat/csrc/generate-whisper-melbank-v3.py
+++ b/kaldifeat/csrc/generate-whisper-melbank-v3.py
@ -1,39 +0,0 @@
 #!/usr/bin/env python3
 # Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
 import librosa
 import numpy as np
 def main():
    m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=128)
    assert m.shape == (128, 201)
    s = "// Auto-generated. Do NOT edit!\n\n"
    s += "// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)\n\n"
    s += "\n"
    s += "#ifndef KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
    s += "#define KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
    s += "namespace kaldifeat {\n\n"
    s += f"constexpr int32_t kWhisperV3MelRows = {m.shape[0]};\n"
    s += f"constexpr int32_t kWhisperV3MelCols = {m.shape[1]};\n"
    s += "\n"
    s += "constexpr float kWhisperV3MelArray[] = {\n"
    sep = ""
    for i, f in enumerate(m.reshape(-1).tolist()):
        s += f"{sep}{f:.8f}"
        sep = ", "
        if i and i % 7 == 0:
            s += ",\n"
            sep = ""
    s += "};\n\n"
    s += "}  // namespace kaldifeat\n\n"
    s += "#endif  // KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n"
    with open("whisper-v3-mel-bank.h", "w") as f:
        f.write(s)
 if __name__ == "__main__":
    main()
--- a/kaldifeat/csrc/generate-whisper-melbank.py
+++ b/kaldifeat/csrc/generate-whisper-melbank.py
@ -1,39 +0,0 @@
 #!/usr/bin/env python3
 # Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
 import librosa
 import numpy as np
 def main():
    m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=80)
    assert m.shape == (80, 201)
    s = "// Auto-generated. Do NOT edit!\n\n"
    s += "// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)\n\n"
    s += "\n"
    s += "#ifndef KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
    s += "#define KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
    s += "namespace kaldifeat {\n\n"
    s += f"constexpr int32_t kWhisperMelRows = {m.shape[0]};\n"
    s += f"constexpr int32_t kWhisperMelCols = {m.shape[1]};\n"
    s += "\n"
    s += "constexpr float kWhisperMelArray[] = {\n"
    sep = ""
    for i, f in enumerate(m.reshape(-1).tolist()):
        s += f"{sep}{f:.8f}"
        sep = ", "
        if i and i % 7 == 0:
            s += ",\n"
            sep = ""
    s += "};\n\n"
    s += "}  // namespace kaldifeat\n\n"
    s += "#endif  // KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n"
    with open("whisper-mel-bank.h", "w") as f:
        f.write(s)
 if __name__ == "__main__":
    main()
--- a/kaldifeat/csrc/log.h
+++ b/kaldifeat/csrc/log.h
@ -5,7 +5,6 @@
 #ifndef KALDIFEAT_CSRC_LOG_H_
 #define KALDIFEAT_CSRC_LOG_H_
 #include <cstdint>
 #include <cstdlib>
 #include <iostream>
 #include <sstream>
--- a/kaldifeat/csrc/matrix-functions.h
+++ b/kaldifeat/csrc/matrix-functions.h
@ -7,7 +7,7 @@
 #ifndef KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_
 #define KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_
-#include "torch/script.h"
+#include "torch/torch.h"
 namespace kaldifeat {
--- a/kaldifeat/csrc/mel-computations.cc
+++ b/kaldifeat/csrc/mel-computations.cc
@ -3,11 +3,9 @@
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 //
 // This file is copied/modified from kaldi/src/feat/mel-computations.cc
-
+//
 #include "kaldifeat/csrc/mel-computations.h"
 #include <algorithm>
 #include "kaldifeat/csrc/feature-window.h"
 namespace kaldifeat {
@ -138,14 +136,9 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
                  << " and vtln-high " << vtln_high << ", versus "
                  << "low-freq " << low_freq << " and high-freq " << high_freq;
  // we will transpose bins_mat_ at the end of this function
  bins_mat_ = torch::zeros({num_bins, num_fft_bins}, torch::kFloat);
  int32_t stride = bins_mat_.strides()[0];
  center_freqs_ = torch::empty({num_bins}, torch::kFloat);
  float *center_freqs_data = center_freqs_.data_ptr<float>();
  for (int32_t bin = 0; bin < num_bins; ++bin) {
    float left_mel = mel_low_freq + bin * mel_freq_delta,
          center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
@ -159,7 +152,6 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
      right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
                                  vtln_warp_factor, right_mel);
    }
    center_freqs_data[bin] = InverseMelScale(center_mel);
    // this_bin will be a vector of coefficients that is only
    // nonzero where this mel bin is active.
    float *this_bin = bins_mat_.data_ptr<float>() + bin * stride;
@ -179,14 +171,12 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
        last_index = i;
      }
    }
-
+    KALDIFEAT_ASSERT(first_index != -1 && last_index >= first_index &&
-    // Note: It is possible that first_index == last_index == -1 at this line.
+                     "You may have set num_mel_bins too large.");
    // Replicate a bug in HTK, for testing purposes.
-    if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f &&
+    if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f)
        first_index != -1) {
      this_bin[first_index] = 0.0f;
    }
  }
  if (debug_) KALDIFEAT_LOG << bins_mat_;
@ -198,15 +188,6 @@ MelBanks::MelBanks(const MelBanksOptions &opts,
  }
 }
 MelBanks::MelBanks(const float *weights, int32_t num_rows, int32_t num_cols,
                   torch::Device device)
    : debug_(false), htk_mode_(false) {
  bins_mat_ = torch::from_blob(const_cast<float *>(weights),
                               {num_rows, num_cols}, torch::kFloat)
                  .t()
                  .to(device);
 }
 torch::Tensor MelBanks::Compute(const torch::Tensor &spectrum) const {
  return torch::mm(spectrum, bins_mat_);
 }
@ -222,141 +203,4 @@ void ComputeLifterCoeffs(float Q, torch::Tensor *coeffs) {
  }
 }
 void GetEqualLoudnessVector(const MelBanks &mel_banks, torch::Tensor *ans) {
  int32_t n = mel_banks.NumBins();
  // Central frequency of each mel bin.
  const torch::Tensor &f0 = mel_banks.GetCenterFreqs();
  const float *f0_data = f0.data_ptr<float>();
  *ans = torch::empty({1, n}, torch::kFloat);
  float *ans_data = ans->data_ptr<float>();
  for (int32_t i = 0; i < n; ++i) {
    float fsq = f0_data[i] * f0_data[i];
    float fsub = fsq / (fsq + 1.6e5);
    ans_data[i] = fsub * fsub * ((fsq + 1.44e6) / (fsq + 9.61e6));
  }
 }
 // Durbin's recursion - converts autocorrelation coefficients to the LPC
 // pTmp - temporal place [n]
 // pAC - autocorrelation coefficients [n + 1]
 // pLP - linear prediction coefficients [n]
 //       (predicted_sn = sum_1^P{a[i-1] * s[n-i]}})
 //       F(z) = 1 / (1 - A(z)), 1 is not stored in the denominator
 static float Durbin(int n, const float *pAC, float *pLP, float *pTmp) {
  float ki;  // reflection coefficient
  int i;
  int j;
  float E = pAC[0];
  for (i = 0; i < n; ++i) {
    // next reflection coefficient
    ki = pAC[i + 1];
    for (j = 0; j < i; ++j) ki += pLP[j] * pAC[i - j];
    ki = ki / E;
    // new error
    float c = 1 - ki * ki;
    if (c < 1.0e-5)  // remove NaNs for constant signal
      c = 1.0e-5;
    E *= c;
    // new LP coefficients
    pTmp[i] = -ki;
    for (j = 0; j < i; ++j) pTmp[j] = pLP[j] - ki * pLP[i - j - 1];
    for (j = 0; j <= i; ++j) pLP[j] = pTmp[j];
  }
  return E;
 }
 // Compute LP coefficients from autocorrelation coefficients.
 torch::Tensor ComputeLpc(const torch::Tensor &autocorr_in,
                         torch::Tensor *lpc_out) {
  KALDIFEAT_ASSERT(autocorr_in.dim() == 2);
  int32_t num_frames = autocorr_in.size(0);
  int32_t lpc_order = autocorr_in.size(1) - 1;
  *lpc_out = torch::empty({num_frames, lpc_order}, torch::kFloat);
  torch::Tensor ans = torch::empty({num_frames}, torch::kFloat);
  // TODO(fangjun): Durbin runs only on CPU. Implement a CUDA version
  torch::Device saved_device = autocorr_in.device();
  torch::Device cpu("cpu");
  torch::Tensor in_cpu = autocorr_in.to(cpu);
  torch::Tensor tmp = torch::empty_like(*lpc_out);
  int32_t in_stride = in_cpu.stride(0);
  int32_t ans_stride = ans.stride(0);
  int32_t tmp_stride = tmp.stride(0);
  int32_t lpc_stride = lpc_out->stride(0);
  const float *in_data = in_cpu.data_ptr<float>();
  float *ans_data = ans.data_ptr<float>();
  float *tmp_data = tmp.data_ptr<float>();
  float *lpc_data = lpc_out->data_ptr<float>();
  // see
  // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Parallel.h#L58
  at::parallel_for(0, num_frames, 1, [&](int32_t begin, int32_t end) -> void {
    for (int32_t i = begin; i != end; ++i) {
      float ret = Durbin(lpc_order, in_data + i * in_stride,
                         lpc_data + i * lpc_stride, tmp_data + i * tmp_stride);
      if (ret <= 0.0) KALDIFEAT_WARN << "Zero energy in LPC computation";
      ans_data[i] = -logf(1.0 / ret);  // forms the C0 value
    }
  });
  *lpc_out = lpc_out->to(saved_device);
  return ans.to(saved_device);
 }
 static void Lpc2CepstrumInternal(int n, const float *pLPC, float *pCepst) {
  for (int32_t i = 0; i < n; ++i) {
    double sum = 0.0;
    for (int32_t j = 0; j < i; ++j) {
      sum += (i - j) * pLPC[j] * pCepst[i - j - 1];
    }
    pCepst[i] = -pLPC[i] - sum / (i + 1);
  }
 }
 torch::Tensor Lpc2Cepstrum(const torch::Tensor &lpc) {
  KALDIFEAT_ASSERT(lpc.dim() == 2);
  torch::Device cpu("cpu");
  torch::Device saved_device = lpc.device();
  // TODO(fangjun): support cuda
  torch::Tensor in_cpu = lpc.to(cpu);
  int32_t num_frames = in_cpu.size(0);
  int32_t lpc_order = in_cpu.size(1);
  const float *in_data = in_cpu.data_ptr<float>();
  int32_t in_stride = in_cpu.stride(0);
  torch::Tensor ans = torch::zeros({num_frames, lpc_order}, torch::kFloat);
  int32_t ans_stride = ans.stride(0);
  float *ans_data = ans.data_ptr<float>();
  at::parallel_for(0, num_frames, 1, [&](int32_t begin, int32_t end) -> void {
    for (int32_t i = begin; i != end; ++i) {
      Lpc2CepstrumInternal(lpc_order, in_data + i * in_stride,
                           ans_data + i * ans_stride);
    }
  });
  return ans.to(saved_device);
 }
 }  // namespace kaldifeat
--- a/kaldifeat/csrc/mel-computations.h
+++ b/kaldifeat/csrc/mel-computations.h
@ -5,7 +5,6 @@
 // This file is copied/modified from kaldi/src/feat/mel-computations.h
 #include <cmath>
 #include <string>
 #include "kaldifeat/csrc/feature-window.h"
@ -36,14 +35,13 @@ struct MelBanksOptions {
  std::string ToString() const {
    std::ostringstream os;
-    os << "MelBanksOptions(";
+    os << "num_bins: " << num_bins << "\n";
-    os << "num_bins=" << num_bins << ", ";
+    os << "low_freq: " << low_freq << "\n";
-    os << "low_freq=" << low_freq << ", ";
+    os << "high_freq: " << high_freq << "\n";
-    os << "high_freq=" << high_freq << ", ";
+    os << "vtln_low: " << vtln_low << "\n";
-    os << "vtln_low=" << vtln_low << ", ";
+    os << "vtln_high: " << vtln_high << "\n";
-    os << "vtln_high=" << vtln_high << ", ";
+    os << "debug_mel: " << debug_mel << "\n";
-    os << "debug_mel=" << (debug_mel ? "True" : "False") << ", ";
+    os << "htk_mode: " << htk_mode << "\n";
    os << "htk_mode=" << (htk_mode ? "True" : "False") << ")";
    return os.str();
  }
 };
@ -76,22 +74,7 @@ class MelBanks {
           const FrameExtractionOptions &frame_opts, float vtln_warp_factor,
           torch::Device device);
-  // Initialize with a 2-d weights matrix
+  int32_t NumBins() const { return static_cast<int32_t>(bins_mat_.size(0)); }
  //
  // Note: This constructor is for Whisper. It does not initialize
  // center_freqs_.
  //
  // @param weights Pointer to the start address of the matrix
  // @param num_rows It equals to number of mel bins
  // @param num_cols It equals to (number of fft bins)/2+1
  MelBanks(const float *weights, int32_t num_rows, int32_t num_cols,
           torch::Device device);
  // CAUTION: we save a transposed version of bins_mat_, so return size(1) here
  int32_t NumBins() const { return static_cast<int32_t>(bins_mat_.size(1)); }
  // returns vector of central freq of each bin; needed by plp code.
  const torch::Tensor &GetCenterFreqs() const { return center_freqs_; }
  torch::Tensor Compute(const torch::Tensor &spectrum) const;
@ -99,15 +82,9 @@ class MelBanks {
  const torch::Tensor &GetBinsMat() const { return bins_mat_; }
 private:
-  // A 2-D matrix. Its shape is NOT [num_bins, num_fft_bins]
+  // A 2-D matrix of shape [num_bins, num_fft_bins]
  // Its shape is [num_fft_bins, num_bins] for non-whisper.
  // For whisper, its shape is [num_fft_bins/2+1, num_bins]
  torch::Tensor bins_mat_;
  // center frequencies of bins, numbered from 0 ... num_bins-1.
  // Needed by GetCenterFreqs().
  torch::Tensor center_freqs_;  // It's always on CPU
  bool debug_;
  bool htk_mode_;
 };
@ -119,26 +96,6 @@ class MelBanks {
 // coeffs is a 1-D float tensor
 void ComputeLifterCoeffs(float Q, torch::Tensor *coeffs);
 void GetEqualLoudnessVector(const MelBanks &mel_banks, torch::Tensor *ans);
 /* Compute LP coefficients from autocorrelation coefficients.
 *
 *  @param [in] autocorr_in  A 2-D tensor. Each row is a frame. Its number of
 *                           columns is lpc_order + 1
 *  @param [out] lpc_coeffs  A 2-D tensor. On return, it has as many rows as the
 *                           input tensor. Its number of columns is lpc_order.
 *
 *  @return Returns log energy of residual in a 1-D tensor. It has as many
 *          elements as the number of rows in `autocorr_in`.
 */
 torch::Tensor ComputeLpc(const torch::Tensor &autocorr_in,
                         torch::Tensor *lpc_coeffs);
 /*
 * @param [in] lpc It is the output argument `lpc_coeffs` in ComputeLpc().
 */
 torch::Tensor Lpc2Cepstrum(const torch::Tensor &lpc);
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_CSRC_MEL_COMPUTATIONS_H_
--- a/kaldifeat/csrc/online-feature-itf.h
+++ b/kaldifeat/csrc/online-feature-itf.h
@ -1,89 +0,0 @@
 // kaldifeat/csrc/online-feature-itf.h
 //
 // Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/itf/online-feature-itf.h
 #ifndef KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_
 #define KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_
 #include <utility>
 #include <vector>
 #include "torch/script.h"
 namespace kaldifeat {
 class OnlineFeatureInterface {
 public:
  virtual ~OnlineFeatureInterface() = default;
  virtual int32_t Dim() const = 0;  /// returns the feature dimension.
                                    //
  // Returns frame shift in seconds.  Helps to estimate duration from frame
  // counts.
  virtual float FrameShiftInSeconds() const = 0;
  /// Returns the total number of frames, since the start of the utterance, that
  /// are now available.  In an online-decoding context, this will likely
  /// increase with time as more data becomes available.
  virtual int32_t NumFramesReady() const = 0;
  /// Returns true if this is the last frame.  Frame indices are zero-based, so
  /// the first frame is zero.  IsLastFrame(-1) will return false, unless the
  /// file is empty (which is a case that I'm not sure all the code will handle,
  /// so be careful).  This function may return false for some frame if we
  /// haven't yet decided to terminate decoding, but later true if we decide to
  /// terminate decoding.  This function exists mainly to correctly handle end
  /// effects in feature extraction, and is not a mechanism to determine how
  /// many frames are in the decodable object (as it used to be, and for
  /// backward compatibility, still is, in the Decodable interface).
  virtual bool IsLastFrame(int32_t frame) const = 0;
  /// Gets the feature vector for this frame.  Before calling this for a given
  /// frame, it is assumed that you called NumFramesReady() and it returned a
  /// number greater than "frame".  Otherwise this call will likely crash with
  /// an assert failure.  This function is not declared const, in case there is
  /// some kind of caching going on, but most of the time it shouldn't modify
  /// the class.
  ///
  /// The returned tensor has shape (1, Dim()).
  virtual torch::Tensor GetFrame(int32_t frame) = 0;
  /// This is like GetFrame() but for a collection of frames.  There is a
  /// default implementation that just gets the frames one by one, but it
  /// may be overridden for efficiency by child classes (since sometimes
  /// it's more efficient to do things in a batch).
  ///
  /// The returned tensor has shape (frames.size(), Dim()).
  virtual std::vector<torch::Tensor> GetFrames(
      const std::vector<int32_t> &frames) {
    std::vector<torch::Tensor> features;
    features.reserve(frames.size());
    for (auto i : frames) {
      torch::Tensor f = GetFrame(i);
      features.push_back(std::move(f));
    }
    return features;
 #if 0
    return torch::cat(features, /*dim*/ 0);
 #endif
  }
  /// This would be called from the application, when you get more wave data.
  /// Note: the sampling_rate is typically only provided so the code can assert
  /// that it matches the sampling rate expected in the options.
  virtual void AcceptWaveform(float sampling_rate,
                              const torch::Tensor &waveform) = 0;
  /// InputFinished() tells the class you won't be providing any
  /// more waveform.  This will help flush out the last few frames
  /// of delta or LDA features (it will typically affect the return value
  /// of IsLastFrame.
  virtual void InputFinished() = 0;
 };
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_
--- a/kaldifeat/csrc/online-feature-test.cc
+++ b/kaldifeat/csrc/online-feature-test.cc
@ -1,49 +0,0 @@
 // kaldifeat/csrc/online-feature-test.h
 //
 // Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 #include "kaldifeat/csrc/online-feature.h"
 #include "gtest/gtest.h"
 namespace kaldifeat {
 TEST(RecyclingVector, TestUnlimited) {
  RecyclingVector v(-1);
  constexpr int32_t N = 100;
  for (int32_t i = 0; i != N; ++i) {
    torch::Tensor t = torch::tensor({i, i + 1, i + 2});
    v.PushBack(t);
  }
  ASSERT_EQ(v.Size(), N);
  for (int32_t i = 0; i != N; ++i) {
    torch::Tensor t = v.At(i);
    torch::Tensor expected = torch::tensor({i, i + 1, i + 2});
    EXPECT_TRUE(t.equal(expected));
  }
 }
 TEST(RecyclingVector, Testlimited) {
  constexpr int32_t K = 3;
  constexpr int32_t N = 10;
  RecyclingVector v(K);
  for (int32_t i = 0; i != N; ++i) {
    torch::Tensor t = torch::tensor({i, i + 1, i + 2});
    v.PushBack(t);
  }
  ASSERT_EQ(v.Size(), N);
  for (int32_t i = 0; i < N - K; ++i) {
    ASSERT_DEATH(v.At(i), "");
  }
  for (int32_t i = N - K; i != N; ++i) {
    torch::Tensor t = v.At(i);
    torch::Tensor expected = torch::tensor({i, i + 1, i + 2});
    EXPECT_TRUE(t.equal(expected));
  }
 }
 }  // namespace kaldifeat
--- a/kaldifeat/csrc/online-feature.cc
+++ b/kaldifeat/csrc/online-feature.cc
@ -1,133 +0,0 @@
 // kaldifeat/csrc/online-feature.cc
 //
 // Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/online-feature.cc
 #include "kaldifeat/csrc/online-feature.h"
 #include "kaldifeat/csrc/feature-window.h"
 #include "kaldifeat/csrc/log.h"
 namespace kaldifeat {
 RecyclingVector::RecyclingVector(int32_t items_to_hold)
    : items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
      first_available_index_(0) {}
 torch::Tensor RecyclingVector::At(int32_t index) const {
  if (index < first_available_index_) {
    KALDIFEAT_ERR << "Attempted to retrieve feature vector that was "
                     "already removed by the RecyclingVector (index = "
                  << index << "; "
                  << "first_available_index = " << first_available_index_
                  << "; "
                  << "size = " << Size() << ")";
  }
  // 'at' does size checking.
  return items_.at(index - first_available_index_);
 }
 void RecyclingVector::PushBack(torch::Tensor item) {
  // Note: -1 is a larger number when treated as unsigned
  if (items_.size() == static_cast<size_t>(items_to_hold_)) {
    items_.pop_front();
    ++first_available_index_;
  }
  items_.push_back(item);
 }
 int32_t RecyclingVector::Size() const {
  return first_available_index_ + static_cast<int32_t>(items_.size());
 }
 template <class C>
 OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
    const typename C::Options &opts)
    : computer_(opts),
      window_function_(opts.frame_opts, opts.device),
      features_(opts.frame_opts.max_feature_vectors),
      input_finished_(false),
      waveform_offset_(0) {}
 template <class C>
 void OnlineGenericBaseFeature<C>::AcceptWaveform(
    float sampling_rate, const torch::Tensor &original_waveform) {
  if (original_waveform.numel() == 0) return;  // Nothing to do.
  KALDIFEAT_ASSERT(original_waveform.dim() == 1);
  KALDIFEAT_ASSERT(sampling_rate == computer_.GetFrameOptions().samp_freq);
  if (input_finished_)
    KALDIFEAT_ERR << "AcceptWaveform called after InputFinished() was called.";
  if (waveform_remainder_.numel() == 0) {
    waveform_remainder_ = original_waveform;
  } else {
    waveform_remainder_ =
        torch::cat({waveform_remainder_, original_waveform}, /*dim*/ 0);
  }
  ComputeFeatures();
 }
 template <class C>
 void OnlineGenericBaseFeature<C>::InputFinished() {
  input_finished_ = true;
  ComputeFeatures();
 }
 template <class C>
 void OnlineGenericBaseFeature<C>::ComputeFeatures() {
  const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
  int64_t num_samples_total = waveform_offset_ + waveform_remainder_.numel();
  int32_t num_frames_old = features_.Size();
  int32_t num_frames_new =
      NumFrames(num_samples_total, frame_opts, input_finished_);
  KALDIFEAT_ASSERT(num_frames_new >= num_frames_old);
  // note: this online feature-extraction code does not support VTLN.
  float vtln_warp = 1.0;
  for (int32_t frame = num_frames_old; frame < num_frames_new; ++frame) {
    torch::Tensor window =
        ExtractWindow(waveform_offset_, waveform_remainder_, frame, frame_opts);
    // TODO(fangjun): We can compute all feature frames at once
    torch::Tensor this_feature =
        computer_.ComputeFeatures(window.unsqueeze(0), vtln_warp);
    features_.PushBack(this_feature);
  }
  // OK, we will now discard any portion of the signal that will not be
  // necessary to compute frames in the future.
  int64_t first_sample_of_next_frame =
      FirstSampleOfFrame(num_frames_new, frame_opts);
  int32_t samples_to_discard = first_sample_of_next_frame - waveform_offset_;
  if (samples_to_discard > 0) {
    // discard the leftmost part of the waveform that we no longer need.
    int32_t new_num_samples = waveform_remainder_.numel() - samples_to_discard;
    if (new_num_samples <= 0) {
      // odd, but we'll try to handle it.
      waveform_offset_ += waveform_remainder_.numel();
      waveform_remainder_.resize_({0});
    } else {
      using torch::indexing::None;
      using torch::indexing::Slice;
      waveform_remainder_ =
          waveform_remainder_.index({Slice(samples_to_discard, None)});
      waveform_offset_ += samples_to_discard;
    }
  }
 }
 // instantiate the templates defined here for MFCC, PLP and filterbank classes.
 template class OnlineGenericBaseFeature<Mfcc>;
 template class OnlineGenericBaseFeature<Plp>;
 template class OnlineGenericBaseFeature<Fbank>;
 }  // namespace kaldifeat
--- a/kaldifeat/csrc/online-feature.h
+++ b/kaldifeat/csrc/online-feature.h
@ -1,127 +0,0 @@
 // kaldifeat/csrc/online-feature.h
 //
 // Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/online-feature.h
 #ifndef KALDIFEAT_CSRC_ONLINE_FEATURE_H_
 #define KALDIFEAT_CSRC_ONLINE_FEATURE_H_
 #include <deque>
 #include "kaldifeat/csrc/feature-fbank.h"
 #include "kaldifeat/csrc/feature-mfcc.h"
 #include "kaldifeat/csrc/feature-plp.h"
 #include "kaldifeat/csrc/feature-window.h"
 #include "kaldifeat/csrc/online-feature-itf.h"
 namespace kaldifeat {
 /// This class serves as a storage for feature vectors with an option to limit
 /// the memory usage by removing old elements. The deleted frames indices are
 /// "remembered" so that regardless of the MAX_ITEMS setting, the user always
 /// provides the indices as if no deletion was being performed.
 /// This is useful when processing very long recordings which would otherwise
 /// cause the memory to eventually blow up when the features are not being
 /// removed.
 class RecyclingVector {
 public:
  /// By default it does not remove any elements.
  explicit RecyclingVector(int32_t items_to_hold = -1);
  ~RecyclingVector() = default;
  RecyclingVector(const RecyclingVector &) = delete;
  RecyclingVector &operator=(const RecyclingVector &) = delete;
  torch::Tensor At(int32_t index) const;
  void PushBack(torch::Tensor item);
  /// This method returns the size as if no "recycling" had happened,
  /// i.e. equivalent to the number of times the PushBack method has been
  /// called.
  int32_t Size() const;
 private:
  std::deque<torch::Tensor> items_;
  int32_t items_to_hold_;
  int32_t first_available_index_;
 };
 /// This is a templated class for online feature extraction;
 /// it's templated on a class like MfccComputer or PlpComputer
 /// that does the basic feature extraction.
 template <class C>
 class OnlineGenericBaseFeature : public OnlineFeatureInterface {
 public:
  // Constructor from options class
  explicit OnlineGenericBaseFeature(const typename C::Options &opts);
  int32_t Dim() const override { return computer_.Dim(); }
  float FrameShiftInSeconds() const override {
    return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
  }
  int32_t NumFramesReady() const override { return features_.Size(); }
  // Note: IsLastFrame() will only ever return true if you have called
  // InputFinished() (and this frame is the last frame).
  bool IsLastFrame(int32_t frame) const override {
    return input_finished_ && frame == NumFramesReady() - 1;
  }
  torch::Tensor GetFrame(int32_t frame) override { return features_.At(frame); }
  // This would be called from the application, when you get
  // more wave data.  Note: the sampling_rate is only provided so
  // the code can assert that it matches the sampling rate
  // expected in the options.
  void AcceptWaveform(float sampling_rate,
                      const torch::Tensor &waveform) override;
  // InputFinished() tells the class you won't be providing any
  // more waveform.  This will help flush out the last frame or two
  // of features, in the case where snip-edges == false; it also
  // affects the return value of IsLastFrame().
  void InputFinished() override;
 private:
  // This function computes any additional feature frames that it is possible to
  // compute from 'waveform_remainder_', which at this point may contain more
  // than just a remainder-sized quantity (because AcceptWaveform() appends to
  // waveform_remainder_ before calling this function).  It adds these feature
  // frames to features_, and shifts off any now-unneeded samples of input from
  // waveform_remainder_ while incrementing waveform_offset_ by the same amount.
  void ComputeFeatures();
  C computer_;  // class that does the MFCC or PLP or filterbank computation
  FeatureWindowFunction window_function_;
  // features_ is the Mfcc or Plp or Fbank features that we have already
  // computed.
  RecyclingVector features_;
  // True if the user has called "InputFinished()"
  bool input_finished_;
  // waveform_offset_ is the number of samples of waveform that we have
  // already discarded, i.e. that were prior to 'waveform_remainder_'.
  int64_t waveform_offset_;
  // waveform_remainder_ is a short piece of waveform that we may need to keep
  // after extracting all the whole frames we can (whatever length of feature
  // will be required for the next phase of computation).
  // It is a 1-D tensor
  torch::Tensor waveform_remainder_;
 };
 using OnlineMfcc = OnlineGenericBaseFeature<Mfcc>;
 using OnlinePlp = OnlineGenericBaseFeature<Plp>;
 using OnlineFbank = OnlineGenericBaseFeature<Fbank>;
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_CSRC_ONLINE_FEATURE_H_
--- a/kaldifeat/csrc/pitch-functions.h
+++ b/kaldifeat/csrc/pitch-functions.h
@ -1,154 +0,0 @@
 // kaldifeat/csrc/pitch-functions.h
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 // This file is copied/modified from kaldi/src/feat/pitch-functions.h
 #ifndef KALDIFEAT_CSRC_PITCH_FUNCTIONS_H_
 #define KALDIFEAT_CSRC_PITCH_FUNCTIONS_H_
 // References
 //
 // Talkin, David, and W. Bastiaan Kleijn. "A robust algorithm for pitch
 // tracking (RAPT)." coding and synthesis 495 (1995): 518.
 // (https://www.ee.columbia.edu/~dpwe/papers/Talkin95-rapt.pdf)
 //
 // Ghahremani, Pegah, et al. "A pitch extraction algorithm tuned for
 // automatic speech recognition." 2014 IEEE international conference on
 // acoustics, speech and signal processing (ICASSP). IEEE, 2014.
 // (http://danielpovey.com/files/2014_icassp_pitch.pdf)
 #include <string>
 #include "torch/script.h"
 namespace kaldifeat {
 struct PitchExtractionOptions {
  // sample frequency in hertz
  // must match the waveform file
  float samp_freq = 16000;
  float frame_shift_ms = 10.0;   // in milliseconds.
  float frame_length_ms = 25.0;  // in milliseconds.
  // Preemphasis coefficient. [use is deprecated.]
  float preemph_coeff = 0.0;
  float min_f0 = 50;            // min f0 to search (Hz)
  float max_f0 = 400;           // max f0 to search (Hz)
  float soft_min_f0 = 10.0;     // Minimum f0, applied in soft way, must not
                                // exceed min-f0
  float penalty_factor = 0.1;   // cost factor for FO change
  float lowpass_cutoff = 1000;  // cutoff frequency for Low pass filter (Hz)
  // Integer that determines filter width when
  // upsampling NCCF
  // Frequency that we down-sample the signal to.  Must be
  // more than twice lowpass-cutoff
  float resample_freq = 4000;
  float delta_pitch = 0.005;          // the pitch tolerance in pruning lags
  float nccf_ballast = 7000;          // Increasing this factor reduces NCCF for
                                      // quiet frames, helping ensure pitch
                                      // continuity in unvoiced region
  int32_t lowpass_filter_width = 1;   // Integer that determines filter width of
                                      // lowpass filter
  int32_t upsample_filter_width = 5;  // Integer that determines filter width
                                      // when upsampling NCCF
  // Below are newer config variables, not present in the original paper,
  // that relate to the online pitch extraction algorithm.
  // The maximum number of frames of latency that we allow the pitch-processing
  // to introduce, for online operation. If you set this to a large value,
  // there would be no inaccuracy from the Viterbi traceback (but it might make
  // you wait to see the pitch). This is not very relevant for the online
  // operation: normalization-right-context is more relevant, you
  // can just leave this value at zero.
  int32_t max_frames_latency = 0;
  // Only relevant for the function ComputeKaldiPitch which is called by
  // compute-kaldi-pitch-feats. If nonzero, we provide the input as chunks of
  // this size. This affects the energy normalization which has a small effect
  // on the resulting features, especially at the beginning of a file. For best
  // compatibility with online operation (e.g. if you plan to train models for
  // the online-deocding setup), you might want to set this to a small value,
  // like one frame.
  int32_t frames_per_chunk = 0;
  // Only relevant for the function ComputeKaldiPitch which is called by
  // compute-kaldi-pitch-feats, and only relevant if frames_per_chunk is
  // nonzero. If true, it will query the features as soon as they are
  // available, which simulates the first-pass features you would get in online
  // decoding. If false, the features you will get will be the same as those
  // available at the end of the utterance, after InputFinished() has been
  // called: e.g. during lattice rescoring.
  bool simulate_first_pass_online = false;
  // Only relevant for online operation or when emulating online operation
  // (e.g. when setting frames_per_chunk). This is the frame-index on which we
  // recompute the NCCF (e.g. frame-index 500 = after 5 seconds); if the
  // segment ends before this we do it when the segment ends. We do this by
  // re-computing the signal average energy, which affects the NCCF via the
  // "ballast term", scaling the resampled NCCF by a factor derived from the
  // average change in the "ballast term", and re-doing the backtrace
  // computation. Making this infinity would be the most exact, but would
  // introduce unwanted latency at the end of long utterances, for little
  // benefit.
  int32_t recompute_frame = 500;
  // This is a "hidden config" used only for testing the online pitch
  // extraction. If true, we compute the signal root-mean-squared for the
  // ballast term, only up to the current frame, rather than the end of the
  // current chunk of signal. This makes the output insensitive to the
  // chunking, which is useful for testing purposes.
  bool nccf_ballast_online = false;
  bool snip_edges = true;
  torch::Device device{"cpu"};
  PitchExtractionOptions() = default;
  /// Returns the window-size in samples, after resampling.  This is the
  /// "basic window size", not the full window size after extending by max-lag.
  // Because of floating point representation, it is more reliable to divide
  // by 1000 instead of multiplying by 0.001, but it is a bit slower.
  int32_t NccfWindowSize() const {
    return static_cast<int32_t>(resample_freq * frame_length_ms / 1000.0);
  }
  /// Returns the window-shift in samples, after resampling.
  int32_t NccfWindowShift() const {
    return static_cast<int32_t>(resample_freq * frame_shift_ms / 1000.0);
  }
  std::string ToString() const {
    std::ostringstream os;
    os << "samp_freq: " << samp_freq << "\n";
    os << "frame_shift_ms: " << frame_shift_ms << "\n";
    os << "frame_length_ms: " << frame_length_ms << "\n";
    os << "preemph_coeff: " << preemph_coeff << "\n";
    os << "min_f0: " << min_f0 << "\n";
    os << "max_f0: " << max_f0 << "\n";
    os << "soft_min_f0: " << soft_min_f0 << "\n";
    os << "penalty_factor: " << penalty_factor << "\n";
    os << "lowpass_cutoff: " << lowpass_cutoff << "\n";
    os << "resample_freq: " << resample_freq << "\n";
    os << "delta_pitch: " << delta_pitch << "\n";
    os << "nccf_ballast: " << nccf_ballast << "\n";
    os << "lowpass_filter_width: " << lowpass_filter_width << "\n";
    os << "upsample_filter_width: " << upsample_filter_width << "\n";
    os << "max_frames_latency: " << max_frames_latency << "\n";
    os << "frames_per_chunk: " << frames_per_chunk << "\n";
    os << "simulate_first_pass_online: " << simulate_first_pass_online << "\n";
    os << "recompute_frame: " << recompute_frame << "\n";
    os << "nccf_ballast_online: " << nccf_ballast_online << "\n";
    os << "snip_edges: " << snip_edges << "\n";
    os << "device: " << device << "\n";
  }
 };
 // TODO(fangjun): Implement it
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_CSRC_PITCH_FUNCTIONS_H_
--- a/kaldifeat/csrc/test_kaldifeat.cc
+++ b/kaldifeat/csrc/test_kaldifeat.cc
@ -2,8 +2,7 @@
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
-#include "torch/all.h"
+#include "torch/torch.h"
 #include "torch/script.h"
 static void TestPreemph() {
  torch::Tensor a = torch::arange(0, 12).reshape({3, 4}).to(torch::kFloat);
--- a/kaldifeat/csrc/whisper-fbank.cc
+++ b/kaldifeat/csrc/whisper-fbank.cc
@ -1,88 +0,0 @@
 /**
 * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "kaldifeat/csrc/whisper-fbank.h"
 #include <cmath>
 #include <vector>
 #include "kaldifeat/csrc/mel-computations.h"
 #include "kaldifeat/csrc/whisper-mel-bank.h"
 #include "kaldifeat/csrc/whisper-v3-mel-bank.h"
 #ifndef M_2PI
 #define M_2PI 6.283185307179586476925286766559005
 #endif
 namespace kaldifeat {
 WhisperFbankComputer::WhisperFbankComputer(const WhisperFbankOptions &opts)
    : opts_(opts) {
  if (opts.num_mels == 80) {
    mel_banks_ = std::make_unique<MelBanks>(kWhisperMelArray, kWhisperMelRows,
                                            kWhisperMelCols, opts.device);
  } else if (opts.num_mels == 128) {
    mel_banks_ = std::make_unique<MelBanks>(
        kWhisperV3MelArray, kWhisperV3MelRows, kWhisperV3MelCols, opts.device);
  } else {
    KALDIFEAT_ERR << "Unsupported num_mels: " << opts.num_mels
                  << ". Support only 80 and 128";
  }
  opts_.frame_opts.samp_freq = 16000;
  opts_.frame_opts.frame_shift_ms = 10;
  opts_.frame_opts.frame_length_ms = 25;
  opts_.frame_opts.dither = 0;
  opts_.frame_opts.preemph_coeff = 0;
  opts_.frame_opts.remove_dc_offset = false;
  opts_.frame_opts.window_type = "hann";
  opts_.frame_opts.round_to_power_of_two = false;
  opts_.frame_opts.snip_edges = false;
 }
 torch::Tensor WhisperFbankComputer::Compute(
    torch::Tensor /*signal_raw_log_energy*/, float /*vtln_warp*/,
    const torch::Tensor &signal_frame) {
  KALDIFEAT_ASSERT(signal_frame.dim() == 2);
  KALDIFEAT_ASSERT(signal_frame.size(1) == opts_.frame_opts.PaddedWindowSize());
  // note spectrum is in magnitude, not power, because of `abs()`
 #if defined(KALDIFEAT_HAS_FFT_NAMESPACE)
  // signal_frame shape: [x, 512]
  // power shape [x, 257]
  torch::Tensor power = torch::fft::rfft(signal_frame).abs().pow(2);
 #else
  // signal_frame shape [x, 512]
  // real_imag shape [x, 257, 2],
  //   where [..., 0] is the real part
  //         [..., 1] is the imaginary part
  torch::Tensor real_imag = torch::rfft(signal_frame, 1);
  torch::Tensor real = real_imag.index({"...", 0});
  torch::Tensor imag = real_imag.index({"...", 1});
  torch::Tensor power = (real.square() + imag.square());
 #endif
  torch::Tensor mel_energies = mel_banks_->Compute(power);
  torch::Tensor log_spec = torch::clamp_min(mel_energies, 1e-10).log10();
  log_spec = torch::maximum(log_spec, log_spec.max() - 8.0);
  torch::Tensor mel = (log_spec + 4.0) / 4.0;
  return mel;
 }
 }  // namespace kaldifeat
--- a/kaldifeat/csrc/whisper-fbank.h
+++ b/kaldifeat/csrc/whisper-fbank.h
@ -1,78 +0,0 @@
 /**
 * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef KALDIFEAT_CSRC_WHISPER_FBANK_H_
 #define KALDIFEAT_CSRC_WHISPER_FBANK_H_
 #include <memory>
 #include <string>
 #include <vector>
 #include "kaldifeat/csrc/feature-common.h"
 #include "kaldifeat/csrc/feature-window.h"
 #include "kaldifeat/csrc/mel-computations.h"
 namespace kaldifeat {
 struct WhisperFbankOptions {
  FrameExtractionOptions frame_opts;
  // for large v3, please use 128
  int32_t num_mels = 80;
  torch::Device device{"cpu"};
  std::string ToString() const {
    std::ostringstream os;
    os << "WhisperFbankOptions(";
    os << "frame_opts=" << frame_opts.ToString() << ", ";
    os << "num_mels=" << num_mels << ", ";
    os << "device=\"" << device << "\")";
    return os.str();
  }
 };
 class WhisperFbankComputer {
 public:
  // note: Only frame_opts.device is used. All other fields from frame_opts
  // are ignored
  explicit WhisperFbankComputer(const WhisperFbankOptions &opts = {});
  int32_t Dim() const { return opts_.num_mels; }
  const FrameExtractionOptions &GetFrameOptions() const {
    return opts_.frame_opts;
  }
  const WhisperFbankOptions &GetOptions() const { return opts_; }
  torch::Tensor Compute(torch::Tensor /*signal_raw_log_energy*/,
                        float /*vtln_warp*/, const torch::Tensor &signal_frame);
  // if true, compute log_energy_pre_window but after dithering and dc removal
  bool NeedRawLogEnergy() const { return false; }
  using Options = WhisperFbankOptions;
 private:
  WhisperFbankOptions opts_;
  std::unique_ptr<MelBanks> mel_banks_;
 };
 using WhisperFbank = OfflineFeatureTpl<WhisperFbankComputer>;
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_CSRC_WHISPER_FBANK_H_
--- a/kaldifeat/csrc/whisper-mel-bank.h
+++ b/kaldifeat/csrc/whisper-mel-bank.h
--- a/kaldifeat/csrc/whisper-v3-mel-bank.h
+++ b/kaldifeat/csrc/whisper-v3-mel-bank.h
--- a/kaldifeat/python/CMakeLists.txt
+++ b/kaldifeat/python/CMakeLists.txt
@ -1,5 +1 @@
 add_subdirectory(csrc)
 if(kaldifeat_BUILD_TESTS)
  add_subdirectory(tests)
 endif()
--- a/kaldifeat/python/csrc/CMakeLists.txt
+++ b/kaldifeat/python/csrc/CMakeLists.txt
@ -2,39 +2,9 @@ add_definitions(-DTORCH_API_INCLUDE_EXTENSION_H)
 pybind11_add_module(_kaldifeat
  feature-fbank.cc
  feature-mfcc.cc
  feature-plp.cc
  feature-spectrogram.cc
  feature-window.cc
  kaldifeat.cc
  mel-computations.cc
  online-feature.cc
  utils.cc
  whisper-fbank.cc
 )
 if(APPLE)
  execute_process(
    COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
    OUTPUT_STRIP_TRAILING_WHITESPACE
    OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
  )
  message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
  target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
 endif()
 if(NOT WIN32)
  target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${kaldifeat_rpath_origin}/kaldifeat/${CMAKE_INSTALL_LIBDIR}")
 endif()
 target_link_libraries(_kaldifeat PRIVATE kaldifeat_core)
-if(UNIX AND NOT APPLE)
+target_link_libraries(_kaldifeat PRIVATE ${TORCH_DIR}/lib/libtorch_python.so)
  target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/libtorch_python.so)
  # target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARY})
 elseif(WIN32)
  target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/torch_python.lib)
  # target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARIES})
 endif()
 install(TARGETS _kaldifeat
  DESTINATION ../
 )
--- a/kaldifeat/python/csrc/CPPLINT.cfg
+++ b/kaldifeat/python/csrc/CPPLINT.cfg
@ -1 +0,0 @@
 filter=-runtime/references
--- a/kaldifeat/python/csrc/feature-fbank.cc
+++ b/kaldifeat/python/csrc/feature-fbank.cc
@ -4,11 +4,9 @@
 #include "kaldifeat/python/csrc/feature-fbank.h"
 #include <memory>
 #include <string>
 #include "kaldifeat/csrc/feature-fbank.h"
 #include "kaldifeat/python/csrc/utils.h"
 namespace kaldifeat {
@ -16,35 +14,6 @@ static void PybindFbankOptions(py::module &m) {
  using PyClass = FbankOptions;
  py::class_<PyClass>(m, "FbankOptions")
      .def(py::init<>())
      .def(py::init([](const MelBanksOptions &mel_opts,
                       const FrameExtractionOptions &frame_opts =
                           FrameExtractionOptions(),
                       bool use_energy = false, float energy_floor = 0.0f,
                       bool raw_energy = true, bool htk_compat = false,
                       bool use_log_fbank = true, bool use_power = true,
                       py::object device =
                           py::str("cpu")) -> std::unique_ptr<FbankOptions> {
             auto opts = std::make_unique<FbankOptions>();
             opts->frame_opts = frame_opts;
             opts->mel_opts = mel_opts;
             opts->use_energy = use_energy;
             opts->energy_floor = energy_floor;
             opts->raw_energy = raw_energy;
             opts->htk_compat = htk_compat;
             opts->use_log_fbank = use_log_fbank;
             opts->use_power = use_power;
             std::string s = static_cast<py::str>(device);
             opts->device = torch::Device(s);
             return opts;
           }),
           py::arg("mel_opts"),
           py::arg("frame_opts") = FrameExtractionOptions(),
           py::arg("use_energy") = false, py::arg("energy_floor") = 0.0f,
           py::arg("raw_energy") = true, py::arg("htk_compat") = false,
           py::arg("use_log_fbank") = true, py::arg("use_power") = true,
           py::arg("device") = py::str("cpu"))
      .def_readwrite("frame_opts", &PyClass::frame_opts)
      .def_readwrite("mel_opts", &PyClass::mel_opts)
      .def_readwrite("use_energy", &PyClass::use_energy)
@ -64,15 +33,7 @@ static void PybindFbankOptions(py::module &m) {
            self.device = torch::Device(s);
          })
      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
+           [](const PyClass &self) -> std::string { return self.ToString(); });
      .def("as_dict",
           [](const PyClass &self) -> py::dict { return AsDict(self); })
      .def_static(
          "from_dict",
          [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })
      .def(py::pickle(
          [](const PyClass &self) -> py::dict { return AsDict(self); },
          [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }));
 }
 static void PybindFbank(py::module &m) {
@ -82,14 +43,7 @@ static void PybindFbank(py::module &m) {
      .def("dim", &PyClass::Dim)
      .def_property_readonly("options", &PyClass::GetOptions)
      .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
-           py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
+           py::arg("vtln_warp"));
      .def(py::pickle(
          [](const PyClass &self) -> py::dict {
            return AsDict(self.GetOptions());
          },
          [](py::dict dict) -> std::unique_ptr<PyClass> {
            return std::make_unique<PyClass>(FbankOptionsFromDict(dict));
          }));
 }
 void PybindFeatureFbank(py::module &m) {
--- a/kaldifeat/python/csrc/feature-mfcc.cc
+++ b/kaldifeat/python/csrc/feature-mfcc.cc
@ -4,11 +4,7 @@
 #include "kaldifeat/python/csrc/feature-mfcc.h"
 #include <memory>
 #include <string>
 #include "kaldifeat/csrc/feature-mfcc.h"
 #include "kaldifeat/python/csrc/utils.h"
 namespace kaldifeat {
@ -16,35 +12,6 @@ void PybindMfccOptions(py::module &m) {
  using PyClass = MfccOptions;
  py::class_<PyClass>(m, "MfccOptions")
      .def(py::init<>())
      .def(py::init([](const MelBanksOptions &mel_opts,
                       const FrameExtractionOptions &frame_opts =
                           FrameExtractionOptions(),
                       int32_t num_ceps = 13, bool use_energy = true,
                       float energy_floor = 0.0, bool raw_energy = true,
                       float cepstral_lifter = 22.0, bool htk_compat = false,
                       py::object device =
                           py::str("cpu")) -> std::unique_ptr<MfccOptions> {
             auto opts = std::make_unique<MfccOptions>();
             opts->frame_opts = frame_opts;
             opts->mel_opts = mel_opts;
             opts->num_ceps = num_ceps;
             opts->use_energy = use_energy;
             opts->energy_floor = energy_floor;
             opts->raw_energy = raw_energy;
             opts->cepstral_lifter = cepstral_lifter;
             opts->htk_compat = htk_compat;
             std::string s = static_cast<py::str>(device);
             opts->device = torch::Device(s);
             return opts;
           }),
           py::arg("mel_opts"),
           py::arg("frame_opts") = FrameExtractionOptions(),
           py::arg("num_ceps") = 13, py::arg("use_energy") = true,
           py::arg("energy_floor") = 0.0f, py::arg("raw_energy") = true,
           py::arg("cepstral_lifter") = 22.0, py::arg("htk_compat") = false,
           py::arg("device") = py::str("cpu"))
      .def_readwrite("frame_opts", &PyClass::frame_opts)
      .def_readwrite("mel_opts", &PyClass::mel_opts)
      .def_readwrite("num_ceps", &PyClass::num_ceps)
@ -64,15 +31,7 @@ void PybindMfccOptions(py::module &m) {
            self.device = torch::Device(s);
          })
      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
+           [](const PyClass &self) -> std::string { return self.ToString(); });
      .def("as_dict",
           [](const PyClass &self) -> py::dict { return AsDict(self); })
      .def_static(
          "from_dict",
          [](py::dict dict) -> PyClass { return MfccOptionsFromDict(dict); })
      .def(py::pickle(
          [](const PyClass &self) -> py::dict { return AsDict(self); },
          [](py::dict dict) -> PyClass { return MfccOptionsFromDict(dict); }));
 }
 static void PybindMfcc(py::module &m) {
@ -82,14 +41,7 @@ static void PybindMfcc(py::module &m) {
      .def("dim", &PyClass::Dim)
      .def_property_readonly("options", &PyClass::GetOptions)
      .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
-           py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
+           py::arg("vtln_warp"));
      .def(py::pickle(
          [](const PyClass &self) -> py::dict {
            return AsDict(self.GetOptions());
          },
          [](py::dict dict) -> std::unique_ptr<PyClass> {
            return std::make_unique<PyClass>(MfccOptionsFromDict(dict));
          }));
 }
 void PybindFeatureMfcc(py::module &m) {
--- a/kaldifeat/python/csrc/feature-plp.cc
+++ b/kaldifeat/python/csrc/feature-plp.cc
@ -1,109 +0,0 @@
 // kaldifeat/python/csrc/feature-plp.cc
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 #include "kaldifeat/python/csrc/feature-plp.h"
 #include <memory>
 #include <string>
 #include "kaldifeat/csrc/feature-plp.h"
 #include "kaldifeat/python/csrc/utils.h"
 namespace kaldifeat {
 void PybindPlpOptions(py::module &m) {
  using PyClass = PlpOptions;
  py::class_<PyClass>(m, "PlpOptions")
      .def(py::init<>())
      .def(py::init([](const MelBanksOptions &mel_opts,
                       const FrameExtractionOptions &frame_opts =
                           FrameExtractionOptions(),
                       int32_t lpc_order = 12, int32_t num_ceps = 13,
                       bool use_energy = true, float energy_floor = 0.0,
                       bool raw_energy = true, float compress_factor = 0.33333,
                       int32_t cepstral_lifter = 22, float cepstral_scale = 1.0,
                       bool htk_compat = false,
                       py::object device =
                           py::str("cpu")) -> std::unique_ptr<PlpOptions> {
             auto opts = std::make_unique<PlpOptions>();
             opts->frame_opts = frame_opts;
             opts->mel_opts = mel_opts;
             opts->lpc_order = lpc_order;
             opts->num_ceps = num_ceps;
             opts->use_energy = use_energy;
             opts->energy_floor = energy_floor;
             opts->raw_energy = raw_energy;
             opts->compress_factor = compress_factor;
             opts->cepstral_lifter = cepstral_lifter;
             opts->cepstral_scale = cepstral_scale;
             opts->htk_compat = htk_compat;
             std::string s = static_cast<py::str>(device);
             opts->device = torch::Device(s);
             return opts;
           }),
           py::arg("mel_opts"),
           py::arg("frame_opts") = FrameExtractionOptions(),
           py::arg("lpc_order") = 12, py::arg("num_ceps") = 13,
           py::arg("use_energy") = true, py::arg("energy_floor") = 0.0,
           py::arg("raw_energy") = true, py::arg("compress_factor") = 0.33333,
           py::arg("cepstral_lifter") = 22, py::arg("cepstral_scale") = 1.0,
           py::arg("htk_compat") = false, py::arg("device") = py::str("cpu"))
      .def_readwrite("frame_opts", &PyClass::frame_opts)
      .def_readwrite("mel_opts", &PyClass::mel_opts)
      .def_readwrite("lpc_order", &PyClass::lpc_order)
      .def_readwrite("num_ceps", &PyClass::num_ceps)
      .def_readwrite("use_energy", &PyClass::use_energy)
      .def_readwrite("energy_floor", &PyClass::energy_floor)
      .def_readwrite("raw_energy", &PyClass::raw_energy)
      .def_readwrite("compress_factor", &PyClass::compress_factor)
      .def_readwrite("cepstral_lifter", &PyClass::cepstral_lifter)
      .def_readwrite("cepstral_scale", &PyClass::cepstral_scale)
      .def_readwrite("htk_compat", &PyClass::htk_compat)
      .def_property(
          "device",
          [](const PyClass &self) -> py::object {
            py::object ans = py::module_::import("torch").attr("device");
            return ans(self.device.str());
          },
          [](PyClass &self, py::object obj) -> void {
            std::string s = static_cast<py::str>(obj);
            self.device = torch::Device(s);
          })
      .def("__str__",
           [](const PyClass &self) -> std::string { return self.ToString(); })
      .def("as_dict",
           [](const PyClass &self) -> py::dict { return AsDict(self); })
      .def_static(
          "from_dict",
          [](py::dict dict) -> PyClass { return PlpOptionsFromDict(dict); })
      .def(py::pickle(
          [](const PyClass &self) -> py::dict { return AsDict(self); },
          [](py::dict dict) -> PyClass { return PlpOptionsFromDict(dict); }));
 }
 static void PybindPlp(py::module &m) {
  using PyClass = Plp;
  py::class_<PyClass>(m, "Plp")
      .def(py::init<const PlpOptions &>(), py::arg("opts"))
      .def("dim", &PyClass::Dim)
      .def_property_readonly("options", &PyClass::GetOptions)
      .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
           py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
      .def(py::pickle(
          [](const PyClass &self) -> py::dict {
            return AsDict(self.GetOptions());
          },
          [](py::dict dict) -> std::unique_ptr<PyClass> {
            return std::make_unique<PyClass>(PlpOptionsFromDict(dict));
          }));
 }
 void PybindFeaturePlp(py::module &m) {
  PybindPlpOptions(m);
  PybindPlp(m);
 }
 }  // namespace kaldifeat
--- a/kaldifeat/python/csrc/feature-plp.h
+++ b/kaldifeat/python/csrc/feature-plp.h
@ -1,16 +0,0 @@
 // kaldifeat/python/csrc/feature-plp.h
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 #ifndef KALDIFEAT_PYTHON_CSRC_FEATURE_PLP_H_
 #define KALDIFEAT_PYTHON_CSRC_FEATURE_PLP_H_
 #include "kaldifeat/python/csrc/kaldifeat.h"
 namespace kaldifeat {
 void PybindFeaturePlp(py::module &m);
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_PYTHON_CSRC_FEATURE_PLP_H_
--- a/kaldifeat/python/csrc/feature-spectrogram.cc
+++ b/kaldifeat/python/csrc/feature-spectrogram.cc
@ -1,91 +0,0 @@
 // kaldifeat/python/csrc/feature-spectrogram.cc
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 #include "kaldifeat/python/csrc/feature-spectrogram.h"
 #include <memory>
 #include <string>
 #include "kaldifeat/csrc/feature-spectrogram.h"
 #include "kaldifeat/python/csrc/utils.h"
 namespace kaldifeat {
 static void PybindSpectrogramOptions(py::module &m) {
  using PyClass = SpectrogramOptions;
  py::class_<PyClass>(m, "SpectrogramOptions")
      .def(py::init([](const FrameExtractionOptions &frame_opts =
                           FrameExtractionOptions(),
                       float energy_floor = 0.0, bool raw_energy = true,
                       bool return_raw_fft = false,
                       py::object device = py::str(
                           "cpu")) -> std::unique_ptr<SpectrogramOptions> {
             auto opts = std::make_unique<SpectrogramOptions>();
             opts->frame_opts = frame_opts;
             opts->energy_floor = energy_floor;
             opts->raw_energy = raw_energy;
             opts->return_raw_fft = return_raw_fft;
             std::string s = static_cast<py::str>(device);
             opts->device = torch::Device(s);
             return opts;
           }),
           py::arg("frame_opts") = FrameExtractionOptions(),
           py::arg("energy_floor") = 0.0, py::arg("raw_energy") = true,
           py::arg("return_raw_fft") = false,
           py::arg("device") = py::str("cpu"))
      .def_readwrite("frame_opts", &PyClass::frame_opts)
      .def_readwrite("energy_floor", &PyClass::energy_floor)
      .def_readwrite("raw_energy", &PyClass::raw_energy)
      // .def_readwrite("return_raw_fft", &PyClass::return_raw_fft) // not
      // implemented yet
      .def_property(
          "device",
          [](const PyClass &self) -> py::object {
            py::object ans = py::module_::import("torch").attr("device");
            return ans(self.device.str());
          },
          [](PyClass &self, py::object obj) -> void {
            std::string s = static_cast<py::str>(obj);
            self.device = torch::Device(s);
          })
      .def("__str__",
           [](const PyClass &self) -> std::string { return self.ToString(); })
      .def("as_dict",
           [](const PyClass &self) -> py::dict { return AsDict(self); })
      .def_static("from_dict",
                  [](py::dict dict) -> PyClass {
                    return SpectrogramOptionsFromDict(dict);
                  })
      .def(py::pickle(
          [](const PyClass &self) -> py::dict { return AsDict(self); },
          [](py::dict dict) -> PyClass {
            return SpectrogramOptionsFromDict(dict);
          }));
 }
 static void PybindSpectrogram(py::module &m) {
  using PyClass = Spectrogram;
  py::class_<PyClass>(m, "Spectrogram")
      .def(py::init<const SpectrogramOptions &>(), py::arg("opts"))
      .def("dim", &PyClass::Dim)
      .def_property_readonly("options", &PyClass::GetOptions)
      .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
           py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
      .def(py::pickle(
          [](const PyClass &self) -> py::dict {
            return AsDict(self.GetOptions());
          },
          [](py::dict dict) -> std::unique_ptr<PyClass> {
            return std::make_unique<PyClass>(SpectrogramOptionsFromDict(dict));
          }));
 }
 void PybindFeatureSpectrogram(py::module &m) {
  PybindSpectrogramOptions(m);
  PybindSpectrogram(m);
 }
 }  // namespace kaldifeat
--- a/kaldifeat/python/csrc/feature-spectrogram.h
+++ b/kaldifeat/python/csrc/feature-spectrogram.h
@ -1,16 +0,0 @@
 // kaldifeat/python/csrc/feature-spectrogram.h
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 #ifndef KALDIFEAT_PYTHON_CSRC_FEATURE_SPECTROGRAM_H_
 #define KALDIFEAT_PYTHON_CSRC_FEATURE_SPECTROGRAM_H_
 #include "kaldifeat/python/csrc/kaldifeat.h"
 namespace kaldifeat {
 void PybindFeatureSpectrogram(py::module &m);
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_PYTHON_CSRC_FEATURE_SPECTROGRAM_H_
--- a/kaldifeat/python/csrc/feature-window.cc
+++ b/kaldifeat/python/csrc/feature-window.cc
@ -4,78 +4,36 @@
 #include "kaldifeat/python/csrc/feature-window.h"
 #include <memory>
 #include <string>
 #include "kaldifeat/csrc/feature-window.h"
 #include "kaldifeat/python/csrc/utils.h"
 namespace kaldifeat {
 static void PybindFrameExtractionOptions(py::module &m) {
-  using PyClass = FrameExtractionOptions;
+  py::class_<FrameExtractionOptions>(m, "FrameExtractionOptions")
-  py::class_<PyClass>(m, "FrameExtractionOptions")
+      .def(py::init<>())
-      .def(
+      .def_readwrite("samp_freq", &FrameExtractionOptions::samp_freq)
-          py::init([](float samp_freq = 16000, float frame_shift_ms = 10.0f,
+      .def_readwrite("frame_shift_ms", &FrameExtractionOptions::frame_shift_ms)
-                      float frame_length_ms = 25.0f, float dither = 1.0f,
+      .def_readwrite("frame_length_ms",
-                      float preemph_coeff = 0.97f, bool remove_dc_offset = true,
+                     &FrameExtractionOptions::frame_length_ms)
-                      const std::string &window_type = "povey",
+      .def_readwrite("dither", &FrameExtractionOptions::dither)
-                      bool round_to_power_of_two = true,
+      .def_readwrite("preemph_coeff", &FrameExtractionOptions::preemph_coeff)
-                      float blackman_coeff = 0.42f, bool snip_edges = true,
+      .def_readwrite("remove_dc_offset",
-                      int32_t max_feature_vectors =
+                     &FrameExtractionOptions::remove_dc_offset)
-                          -1) -> std::unique_ptr<FrameExtractionOptions> {
+      .def_readwrite("window_type", &FrameExtractionOptions::window_type)
-            auto opts = std::make_unique<FrameExtractionOptions>();
+      .def_readwrite("round_to_power_of_two",
-
+                     &FrameExtractionOptions::round_to_power_of_two)
-            opts->samp_freq = samp_freq;
+      .def_readwrite("blackman_coeff", &FrameExtractionOptions::blackman_coeff)
-            opts->frame_shift_ms = frame_shift_ms;
+      .def_readwrite("snip_edges", &FrameExtractionOptions::snip_edges)
            opts->frame_length_ms = frame_length_ms;
            opts->dither = dither;
            opts->preemph_coeff = preemph_coeff;
            opts->remove_dc_offset = remove_dc_offset;
            opts->window_type = window_type;
            opts->round_to_power_of_two = round_to_power_of_two;
            opts->blackman_coeff = blackman_coeff;
            opts->snip_edges = snip_edges;
            opts->max_feature_vectors = max_feature_vectors;
            return opts;
          }),
          py::arg("samp_freq") = 16000, py::arg("frame_shift_ms") = 10.0f,
          py::arg("frame_length_ms") = 25.0f, py::arg("dither") = 1.0f,
          py::arg("preemph_coeff") = 0.97f, py::arg("remove_dc_offset") = true,
          py::arg("window_type") = "povey",
          py::arg("round_to_power_of_two") = true,
          py::arg("blackman_coeff") = 0.42f, py::arg("snip_edges") = true,
          py::arg("max_feature_vectors") = -1)
      .def_readwrite("samp_freq", &PyClass::samp_freq)
      .def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
      .def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
      .def_readwrite("dither", &PyClass::dither)
      .def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
      .def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset)
      .def_readwrite("window_type", &PyClass::window_type)
      .def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two)
      .def_readwrite("blackman_coeff", &PyClass::blackman_coeff)
      .def_readwrite("snip_edges", &PyClass::snip_edges)
      .def_readwrite("max_feature_vectors", &PyClass::max_feature_vectors)
      .def("as_dict",
           [](const PyClass &self) -> py::dict { return AsDict(self); })
      .def_static("from_dict",
                  [](py::dict dict) -> PyClass {
                    return FrameExtractionOptionsFromDict(dict);
                  })
 #if 0
      .def_readwrite("allow_downsample",
-                     &PyClass::allow_downsample)
+                     &FrameExtractionOptions::allow_downsample)
-      .def_readwrite("allow_upsample", &PyClass::allow_upsample)
+      .def_readwrite("allow_upsample", &FrameExtractionOptions::allow_upsample)
      .def_readwrite("max_feature_vectors",
                     &FrameExtractionOptions::max_feature_vectors)
 #endif
-      .def("__str__",
+      .def("__str__", [](const FrameExtractionOptions &self) -> std::string {
-           [](const PyClass &self) -> std::string { return self.ToString(); })
+        return self.ToString();
-      .def(py::pickle(
+      });
          [](const PyClass &self) -> py::dict { return AsDict(self); },
          [](py::dict dict) -> PyClass {
            return FrameExtractionOptionsFromDict(dict);
          }));
  m.def("num_frames", &NumFrames, py::arg("num_samples"), py::arg("opts"),
        py::arg("flush") = true);
--- a/kaldifeat/python/csrc/kaldifeat.cc
+++ b/kaldifeat/python/csrc/kaldifeat.cc
@ -7,12 +7,8 @@
 #include "kaldifeat/csrc/feature-fbank.h"
 #include "kaldifeat/python/csrc/feature-fbank.h"
 #include "kaldifeat/python/csrc/feature-mfcc.h"
 #include "kaldifeat/python/csrc/feature-plp.h"
 #include "kaldifeat/python/csrc/feature-spectrogram.h"
 #include "kaldifeat/python/csrc/feature-window.h"
 #include "kaldifeat/python/csrc/mel-computations.h"
 #include "kaldifeat/python/csrc/online-feature.h"
 #include "kaldifeat/python/csrc/whisper-fbank.h"
 #include "torch/torch.h"
 namespace kaldifeat {
@ -23,11 +19,7 @@ PYBIND11_MODULE(_kaldifeat, m) {
  PybindFeatureWindow(m);
  PybindMelComputations(m);
  PybindFeatureFbank(m);
  PybindWhisperFbank(&m);
  PybindFeatureMfcc(m);
  PybindFeaturePlp(m);
  PybindFeatureSpectrogram(m);
  PybindOnlineFeature(m);
 }
 }  // namespace kaldifeat
--- a/kaldifeat/python/csrc/kaldifeat.h
+++ b/kaldifeat/python/csrc/kaldifeat.h
@ -6,7 +6,6 @@
 #define KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_
 #include "pybind11/pybind11.h"
 #include "torch/torch.h"
 namespace py = pybind11;
 #endif  // KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_
--- a/kaldifeat/python/csrc/mel-computations.cc
+++ b/kaldifeat/python/csrc/mel-computations.cc
@ -2,37 +2,16 @@
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 #include "kaldifeat/python/csrc/mel-computations.h"
 #include <memory>
 #include <string>
 #include "kaldifeat/csrc/mel-computations.h"
-#include "kaldifeat/python/csrc/utils.h"
+
 #include "kaldifeat/python/csrc/feature-window.h"
 namespace kaldifeat {
 static void PybindMelBanksOptions(py::module &m) {
  using PyClass = MelBanksOptions;
  py::class_<PyClass>(m, "MelBanksOptions")
-      .def(py::init(
+      .def(py::init<>())
               [](int32_t num_bins = 25, float low_freq = 20,
                  float high_freq = 0, float vtln_low = 100,
                  float vtln_high = -500,
                  bool debug_mel = false) -> std::unique_ptr<MelBanksOptions> {
                 auto opts = std::make_unique<MelBanksOptions>();
                 opts->num_bins = num_bins;
                 opts->low_freq = low_freq;
                 opts->high_freq = high_freq;
                 opts->vtln_low = vtln_low;
                 opts->vtln_high = vtln_high;
                 return opts;
               }),
           py::arg("num_bins") = 25, py::arg("low_freq") = 20,
           py::arg("high_freq") = 0, py::arg("vtln_low") = 100,
           py::arg("vtln_high") = -500, py::arg("debug_mel") = false)
      .def_readwrite("num_bins", &PyClass::num_bins)
      .def_readwrite("low_freq", &PyClass::low_freq)
      .def_readwrite("high_freq", &PyClass::high_freq)
@ -41,18 +20,8 @@ static void PybindMelBanksOptions(py::module &m) {
      .def_readwrite("debug_mel", &PyClass::debug_mel)
      .def_readwrite("htk_mode", &PyClass::htk_mode)
      .def("__str__",
-           [](const PyClass &self) -> std::string { return self.ToString(); })
+           [](const PyClass &self) -> std::string { return self.ToString(); });
-      .def("as_dict",
+  ;
           [](const PyClass &self) -> py::dict { return AsDict(self); })
      .def_static("from_dict",
                  [](py::dict dict) -> PyClass {
                    return MelBanksOptionsFromDict(dict);
                  })
      .def(py::pickle(
          [](const PyClass &self) -> py::dict { return AsDict(self); },
          [](py::dict dict) -> PyClass {
            return MelBanksOptionsFromDict(dict);
          }));
 }
 void PybindMelComputations(py::module &m) { PybindMelBanksOptions(m); }
--- a/kaldifeat/python/csrc/online-feature.cc
+++ b/kaldifeat/python/csrc/online-feature.cc
@ -1,39 +0,0 @@
 // kaldifeat/python/csrc/online-feature.cc
 //
 // Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 #include "kaldifeat/python/csrc/online-feature.h"
 #include <string>
 #include "kaldifeat/csrc/online-feature.h"
 namespace kaldifeat {
 template <typename C>
 void PybindOnlineFeatureTpl(py::module &m, const std::string &class_name,
                            const std::string &class_help_doc = "") {
  using PyClass = OnlineGenericBaseFeature<C>;
  using Options = typename C::Options;
  py::class_<PyClass>(m, class_name.c_str(), class_help_doc.c_str())
      .def(py::init<const Options &>(), py::arg("opts"))
      .def_property_readonly("dim", &PyClass::Dim)
      .def_property_readonly("frame_shift_in_seconds",
                             &PyClass::FrameShiftInSeconds)
      .def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
      .def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
      .def("get_frame", &PyClass::GetFrame, py::arg("frame"))
      .def("get_frames", &PyClass::GetFrames, py::arg("frames"),
           py::call_guard<py::gil_scoped_release>())
      .def("accept_waveform", &PyClass::AcceptWaveform,
           py::arg("sampling_rate"), py::arg("waveform"),
           py::call_guard<py::gil_scoped_release>())
      .def("input_finished", &PyClass::InputFinished);
 }
 void PybindOnlineFeature(py::module &m) {
  PybindOnlineFeatureTpl<Mfcc>(m, "OnlineMfcc");
  PybindOnlineFeatureTpl<Fbank>(m, "OnlineFbank");
  PybindOnlineFeatureTpl<Plp>(m, "OnlinePlp");
 }
 }  // namespace kaldifeat
--- a/kaldifeat/python/csrc/online-feature.h
+++ b/kaldifeat/python/csrc/online-feature.h
@ -1,16 +0,0 @@
 // kaldifeat/python/csrc/online-feature.h
 //
 // Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 #ifndef KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_
 #define KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_
 #include "kaldifeat/python/csrc/kaldifeat.h"
 namespace kaldifeat {
 void PybindOnlineFeature(py::module &m);
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_
--- a/kaldifeat/python/csrc/utils.cc
+++ b/kaldifeat/python/csrc/utils.cc
@ -1,284 +0,0 @@
 // kaldifeat/python/csrc/utils.cc
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 #include "kaldifeat/python/csrc/utils.h"
 #include <string>
 #include "kaldifeat/csrc/feature-window.h"
 #define FROM_DICT(type, key)         \
  if (dict.contains(#key)) {         \
    opts.key = py::type(dict[#key]); \
  }
 #define AS_DICT(key) dict[#key] = opts.key
 namespace kaldifeat {
 FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict) {
  FrameExtractionOptions opts;
  FROM_DICT(float_, samp_freq);
  FROM_DICT(float_, frame_shift_ms);
  FROM_DICT(float_, frame_length_ms);
  FROM_DICT(float_, dither);
  FROM_DICT(float_, preemph_coeff);
  FROM_DICT(bool_, remove_dc_offset);
  FROM_DICT(str, window_type);
  FROM_DICT(bool_, round_to_power_of_two);
  FROM_DICT(float_, blackman_coeff);
  FROM_DICT(bool_, snip_edges);
  FROM_DICT(int_, max_feature_vectors);
  return opts;
 }
 py::dict AsDict(const FrameExtractionOptions &opts) {
  py::dict dict;
  AS_DICT(samp_freq);
  AS_DICT(frame_shift_ms);
  AS_DICT(frame_length_ms);
  AS_DICT(dither);
  AS_DICT(preemph_coeff);
  AS_DICT(remove_dc_offset);
  AS_DICT(window_type);
  AS_DICT(round_to_power_of_two);
  AS_DICT(blackman_coeff);
  AS_DICT(snip_edges);
  AS_DICT(max_feature_vectors);
  return dict;
 }
 MelBanksOptions MelBanksOptionsFromDict(py::dict dict) {
  MelBanksOptions opts;
  FROM_DICT(int_, num_bins);
  FROM_DICT(float_, low_freq);
  FROM_DICT(float_, high_freq);
  FROM_DICT(float_, vtln_low);
  FROM_DICT(float_, vtln_high);
  FROM_DICT(bool_, debug_mel);
  FROM_DICT(bool_, htk_mode);
  return opts;
 }
 py::dict AsDict(const MelBanksOptions &opts) {
  py::dict dict;
  AS_DICT(num_bins);
  AS_DICT(low_freq);
  AS_DICT(high_freq);
  AS_DICT(vtln_low);
  AS_DICT(vtln_high);
  AS_DICT(debug_mel);
  AS_DICT(htk_mode);
  return dict;
 }
 FbankOptions FbankOptionsFromDict(py::dict dict) {
  FbankOptions opts;
  if (dict.contains("frame_opts")) {
    opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
  }
  if (dict.contains("mel_opts")) {
    opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
  }
  FROM_DICT(bool_, use_energy);
  FROM_DICT(float_, energy_floor);
  FROM_DICT(bool_, raw_energy);
  FROM_DICT(bool_, htk_compat);
  FROM_DICT(bool_, use_log_fbank);
  FROM_DICT(bool_, use_power);
  if (dict.contains("device")) {
    opts.device = torch::Device(std::string(py::str(dict["device"])));
  }
  return opts;
 }
 py::dict AsDict(const FbankOptions &opts) {
  py::dict dict;
  dict["frame_opts"] = AsDict(opts.frame_opts);
  dict["mel_opts"] = AsDict(opts.mel_opts);
  AS_DICT(use_energy);
  AS_DICT(energy_floor);
  AS_DICT(raw_energy);
  AS_DICT(htk_compat);
  AS_DICT(use_log_fbank);
  AS_DICT(use_power);
  auto torch_device = py::module_::import("torch").attr("device");
  dict["device"] = torch_device(opts.device.str());
  return dict;
 }
 WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict) {
  WhisperFbankOptions opts;
  if (dict.contains("frame_opts")) {
    opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
  }
  FROM_DICT(int_, num_mels);
  if (dict.contains("device")) {
    opts.device = torch::Device(std::string(py::str(dict["device"])));
  }
  return opts;
 }
 py::dict AsDict(const WhisperFbankOptions &opts) {
  py::dict dict;
  dict["frame_opts"] = AsDict(opts.frame_opts);
  AS_DICT(num_mels);
  auto torch_device = py::module_::import("torch").attr("device");
  dict["device"] = torch_device(opts.device.str());
  return dict;
 }
 MfccOptions MfccOptionsFromDict(py::dict dict) {
  MfccOptions opts;
  if (dict.contains("frame_opts")) {
    opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
  }
  if (dict.contains("mel_opts")) {
    opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
  }
  FROM_DICT(int_, num_ceps);
  FROM_DICT(bool_, use_energy);
  FROM_DICT(float_, energy_floor);
  FROM_DICT(bool_, raw_energy);
  FROM_DICT(float_, cepstral_lifter);
  FROM_DICT(bool_, htk_compat);
  if (dict.contains("device")) {
    opts.device = torch::Device(std::string(py::str(dict["device"])));
  }
  return opts;
 }
 py::dict AsDict(const MfccOptions &opts) {
  py::dict dict;
  dict["frame_opts"] = AsDict(opts.frame_opts);
  dict["mel_opts"] = AsDict(opts.mel_opts);
  AS_DICT(num_ceps);
  AS_DICT(use_energy);
  AS_DICT(energy_floor);
  AS_DICT(raw_energy);
  AS_DICT(cepstral_lifter);
  AS_DICT(htk_compat);
  auto torch_device = py::module_::import("torch").attr("device");
  dict["device"] = torch_device(opts.device.str());
  return dict;
 }
 SpectrogramOptions SpectrogramOptionsFromDict(py::dict dict) {
  SpectrogramOptions opts;
  if (dict.contains("frame_opts")) {
    opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
  }
  FROM_DICT(float_, energy_floor);
  FROM_DICT(bool_, raw_energy);
  // FROM_DICT(bool_, return_raw_fft);
  if (dict.contains("device")) {
    opts.device = torch::Device(std::string(py::str(dict["device"])));
  }
  return opts;
 }
 py::dict AsDict(const SpectrogramOptions &opts) {
  py::dict dict;
  dict["frame_opts"] = AsDict(opts.frame_opts);
  AS_DICT(energy_floor);
  AS_DICT(raw_energy);
  auto torch_device = py::module_::import("torch").attr("device");
  dict["device"] = torch_device(opts.device.str());
  return dict;
 }
 PlpOptions PlpOptionsFromDict(py::dict dict) {
  PlpOptions opts;
  if (dict.contains("frame_opts")) {
    opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
  }
  if (dict.contains("mel_opts")) {
    opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
  }
  FROM_DICT(int_, lpc_order);
  FROM_DICT(int_, num_ceps);
  FROM_DICT(bool_, use_energy);
  FROM_DICT(float_, energy_floor);
  FROM_DICT(bool_, raw_energy);
  FROM_DICT(float_, compress_factor);
  FROM_DICT(int_, cepstral_lifter);
  FROM_DICT(float_, cepstral_scale);
  FROM_DICT(bool_, htk_compat);
  if (dict.contains("device")) {
    opts.device = torch::Device(std::string(py::str(dict["device"])));
  }
  return opts;
 }
 py::dict AsDict(const PlpOptions &opts) {
  py::dict dict;
  dict["frame_opts"] = AsDict(opts.frame_opts);
  dict["mel_opts"] = AsDict(opts.mel_opts);
  AS_DICT(lpc_order);
  AS_DICT(num_ceps);
  AS_DICT(use_energy);
  AS_DICT(energy_floor);
  AS_DICT(raw_energy);
  AS_DICT(compress_factor);
  AS_DICT(cepstral_lifter);
  AS_DICT(cepstral_scale);
  AS_DICT(htk_compat);
  auto torch_device = py::module_::import("torch").attr("device");
  dict["device"] = torch_device(opts.device.str());
  return dict;
 }
 #undef FROM_DICT
 #undef AS_DICT
 }  // namespace kaldifeat
--- a/kaldifeat/python/csrc/utils.h
+++ b/kaldifeat/python/csrc/utils.h
@ -1,54 +0,0 @@
 // kaldifeat/python/csrc/utils.h
 //
 // Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
 #ifndef KALDIFEAT_PYTHON_CSRC_UTILS_H_
 #define KALDIFEAT_PYTHON_CSRC_UTILS_H_
 #include "kaldifeat/csrc/feature-fbank.h"
 #include "kaldifeat/csrc/feature-mfcc.h"
 #include "kaldifeat/csrc/feature-plp.h"
 #include "kaldifeat/csrc/feature-spectrogram.h"
 #include "kaldifeat/csrc/feature-window.h"
 #include "kaldifeat/csrc/mel-computations.h"
 #include "kaldifeat/csrc/whisper-fbank.h"
 #include "kaldifeat/python/csrc/kaldifeat.h"
 /*
 * This file contains code about `from_dict` and
 * `as_dict` for various options in kaldifeat.
 *
 * Regarding `from_dict`, users don't need to provide
 * all the fields in the options. If some fields
 * are not provided, it just uses the default one.
 *
 * If the provided dict in `from_dict` is empty,
 * all fields use their default values.
 */
 namespace kaldifeat {
 FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
 py::dict AsDict(const FrameExtractionOptions &opts);
 MelBanksOptions MelBanksOptionsFromDict(py::dict dict);
 py::dict AsDict(const MelBanksOptions &opts);
 FbankOptions FbankOptionsFromDict(py::dict dict);
 py::dict AsDict(const FbankOptions &opts);
 WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict);
 py::dict AsDict(const WhisperFbankOptions &opts);
 MfccOptions MfccOptionsFromDict(py::dict dict);
 py::dict AsDict(const MfccOptions &opts);
 SpectrogramOptions SpectrogramOptionsFromDict(py::dict dict);
 py::dict AsDict(const SpectrogramOptions &opts);
 PlpOptions PlpOptionsFromDict(py::dict dict);
 py::dict AsDict(const PlpOptions &opts);
 }  // namespace kaldifeat
 #endif  // KALDIFEAT_PYTHON_CSRC_UTILS_H_
--- a/Show More
+++ b/Show More
		`@ -1 +0,0 @@`
			<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="94" height="20" role="img" aria-label="cuda: >= 10.1"><title>cuda: >= 10.1</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="94" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="37" height="20" fill="#555"/><rect x="37" width="57" height="20" fill="#fe7d37"/><rect width="94" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="195" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="270">cuda</text><text x="195" y="140" transform="scale(.1)" fill="#fff" textLength="270">cuda</text><text aria-hidden="true" x="645" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">>= 10.1</text><text x="645" y="140" transform="scale(.1)" fill="#fff" textLength="470">>= 10.1</text></g></svg>
		`@ -1 +0,0 @@`
			`../../../../kaldifeat/python/tests/test_fbank_options.py`
		`@ -1,3 +0,0 @@`
			`kaldifeat.OnlineFbank`
			`=====================`
		`@ -1 +0,0 @@`
			`exclude_files=whisper-mel-bank.h,whisper-v3-mel-bank.h`