@@ -268,67 +272,15 @@ See .
See .
+## sherpa
+
+[sherpa](https://github.com/k2-fsa/sherpa) uses kaldifeat for streaming feature
+extraction.
+
+See
+
# Installation
-## From conda (Only for Linux + CUDA)
-
-Supported versions of Python, PyTorch, and CUDA toolkit are listed below:
-
-[](/doc/source/images/python-3.6_3.7_3.8-blue.svg)
-[](/doc/source/images/pytorch-1.6.0_1.7.0_1.7.1_1.8.0_1.8.1_1.9.0-green.svg)
-[](/doc/source/images/cuda-10.1_10.2_11.0_11.1-orange.svg)
-
-```bash
-conda install -c kaldifeat -c pytorch -c conda-forge kaldifeat python=3.8 cudatoolkit=11.1 pytorch=1.8.1
-```
-
-You can select the supported Python version, CUDA toolkit version and PyTorch version as you wish.
-
-**Note:** If you want a CPU only version or want to install `kaldifeat` on other operating systems,
-e.g., macOS, please use `pip install` or compile `kaldifeat` from source.
-
-
-## From PyPi with pip
-
-You need to install PyTorch and CMake first.
-CMake 3.11 is known to work. Other CMake versions may also work.
-PyTorch 1.6.0 and above are known to work. Other PyTorch versions may also work.
-
-```bash
-pip install -v kaldifeat
-```
-
-## From source
-
-The following are the commands to compile `kaldifeat` from source.
-We assume that you have installed `CMake` and PyTorch.
-CMake 3.11 is known to work. Other CMake versions may also work.
-PyTorch 1.6.0 and above are known to work. Other PyTorch versions may also work.
-
-```bash
-mkdir /some/path
-git clone https://github.com/csukuangfj/kaldifeat.git
-cd kaldifeat
-python setup.py install
-```
-
-To test whether `kaldifeat` was installed successfully, you can run:
-```bash
-python3 -c "import kaldifeat; print(kaldifeat.__version__)"
-```
-
-## FAQs
-
-### How to install a CUDA version
-
-There are two approaches:
-
- - (1) Install using `conda`. It always installs a CUDA version of kaldifeat.
- - (2) Install a CUDA version of PyTorch and then install kaldifeat from source
- or use `pip install kaldifeat`.
-
-
-### How to install a CPU-only version
-
-You have to first install a CPU-only version of PyTorch and then install kaldifeat
-from source or use `pip install kaldifeat`.
+Refer to
+
+for installation.
diff --git a/cmake/cmake_extension.py b/cmake/cmake_extension.py
index 8bd21ca..0d14815 100644
--- a/cmake/cmake_extension.py
+++ b/cmake/cmake_extension.py
@@ -128,3 +128,11 @@ class BuildExtension(build_ext):
for so in lib_so:
print(f"Copying {so} to {self.build_lib}/")
shutil.copy(f"{so}", f"{self.build_lib}/")
+
+ print(
+ f"Copying {kaldifeat_dir}/kaldifeat/python/kaldifeat/torch_version.py to {self.build_lib}/kaldifeat" # noqa
+ )
+ shutil.copy(
+ f"{kaldifeat_dir}/kaldifeat/python/kaldifeat/torch_version.py",
+ f"{self.build_lib}/kaldifeat",
+ )
diff --git a/cmake/pybind11.cmake b/cmake/pybind11.cmake
index 4cad4e8..aa99e6a 100644
--- a/cmake/pybind11.cmake
+++ b/cmake/pybind11.cmake
@@ -8,12 +8,9 @@ function(download_pybind11)
include(FetchContent)
- set(pybind11_URL "https://github.com/pybind/pybind11/archive/v2.6.0.tar.gz")
- set(pybind11_HASH "SHA256=90b705137b69ee3b5fc655eaca66d0dc9862ea1759226f7ccd3098425ae69571")
+ set(pybind11_URL "https://github.com/pybind/pybind11/archive/v2.9.2.tar.gz")
+ set(pybind11_HASH "SHA256=6bd528c4dbe2276635dc787b6b1f2e5316cf6b49ee3e150264e455a0d68d19c1")
- set(double_quotes "\"")
- set(dollar "\$")
- set(semicolon "\;")
FetchContent_Declare(pybind11
URL ${pybind11_URL}
URL_HASH ${pybind11_HASH}
diff --git a/doc/source/_static/.gitkeep b/doc/source/_static/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/doc/source/code/test_fbank.py b/doc/source/code/test_fbank.py
deleted file mode 100755
index 0f39a1c..0000000
--- a/doc/source/code/test_fbank.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2021 Xiaomi Corporation (authors: Fangjun Kuang)
-
-import numpy as np
-import soundfile as sf
-import torch
-
-import kaldifeat
-
-
-def read_wave(filename) -> torch.Tensor:
- """Read a wave file and return it as a 1-D tensor.
-
- Note:
- You don't need to scale it to [-32768, 32767].
- We use scaling here to follow the approach in Kaldi.
-
- Args:
- filename:
- Filename of a sound file.
- Returns:
- Return a 1-D tensor containing audio samples.
- """
- with sf.SoundFile(filename) as sf_desc:
- sampling_rate = sf_desc.samplerate
- assert sampling_rate == 16000
- data = sf_desc.read(dtype=np.float32, always_2d=False)
- data *= 32768
- return torch.from_numpy(data)
-
-
-def test_fbank():
- device = torch.device("cpu")
- if torch.cuda.is_available():
- device = torch.device("cuda", 0)
-
- wave0 = read_wave("test_data/test.wav")
- wave1 = read_wave("test_data/test2.wav")
-
- wave0 = wave0.to(device)
- wave1 = wave1.to(device)
-
- opts = kaldifeat.FbankOptions()
- opts.frame_opts.dither = 0
- opts.device = device
-
- fbank = kaldifeat.Fbank(opts)
-
- # We can compute fbank features in batches
- features = fbank([wave0, wave1])
- assert isinstance(features, list), f"{type(features)}"
- assert len(features) == 2
-
- # We can also compute fbank features for a single wave
- features0 = fbank(wave0)
- features1 = fbank(wave1)
-
- assert torch.allclose(features[0], features0)
- assert torch.allclose(features[1], features1)
-
- # To compute fbank features for only a specified frame
- audio_frames = fbank.convert_samples_to_frames(wave0)
- feature_frame_1 = fbank.compute(audio_frames[1])
- feature_frame_10 = fbank.compute(audio_frames[10])
-
- assert torch.allclose(features0[1], feature_frame_1)
- assert torch.allclose(features0[10], feature_frame_10)
-
-
-if __name__ == "__main__":
- test_fbank()
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 2ec9ca6..fef6d6f 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -59,7 +59,7 @@ templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+exclude_patterns = ["images/*.md"]
source_suffix = {
".rst": "restructuredtext",
@@ -102,3 +102,35 @@ html_theme_options = {
"prev_next_buttons_location": "bottom",
"style_external_links": True,
}
+
+rst_epilog = """
+.. _kaldifeat: https://github.com/csukuangfj/kaldifeat
+.. _Kaldi: https://github.com/kaldi-asr/kaldi
+.. _PyTorch: https://pytorch.org/
+.. _kaldifeat.Fbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L10
+.. _kaldifeat.Mfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L10
+.. _kaldifeat.Plp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L10
+.. _kaldifeat.Spectrogram: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/spectrogram.py#L9
+.. _kaldifeat.OnlineFbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L16
+.. _kaldifeat.OnlineMfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L16
+.. _kaldifeat.OnlinePlp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L16
+.. _compute-fbank-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-fbank-feats.cc
+.. _compute-mfcc-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-mfcc-feats.cc
+.. _compute-plp-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-plp-feats.cc
+.. _compute-spectrogram-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-spectrogram-feats.cc
+.. _kaldi::OnlineFbank: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L160
+.. _kaldi::OnlineMfcc: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L158
+.. _kaldi::OnlinePlp: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L159
+.. _kaldifeat.FbankOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-fbank.h#L19
+.. _kaldi::FbankOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.h#L41
+.. _kaldifeat.MfccOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-mfcc.h#L22
+.. _kaldi::MfccOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-mfcc.h#L38
+.. _kaldifeat.PlpOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-plp.h#L24
+.. _kaldi::PlpOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-plp.h#L42
+.. _kaldifeat.SpectrogramOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-spectrogram.h#L18
+.. _kaldi::SpectrogramOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-spectrogram.h#L38
+.. _kaldifeat.FrameExtractionOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-window.h#L30
+.. _kaldi::FrameExtractionOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-window.h#L35
+.. _kaldifeat.MelBanksOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/mel-computations.h#L17
+.. _kaldi::MelBanksOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/mel-computations.h#L43
+"""
diff --git a/doc/source/images/README.md b/doc/source/images/README.md
new file mode 100644
index 0000000..542998b
--- /dev/null
+++ b/doc/source/images/README.md
@@ -0,0 +1,8 @@
+## File description
+
+ is used to create the following files:
+
+- ./os.svg
+- ./python_ge_3.6-blue.svg
+- ./cuda_ge_10.1-orange.svg
+- ./pytorch_ge_1.5.0-green.svg
diff --git a/doc/source/images/os-green.svg b/doc/source/images/os-green.svg
new file mode 100644
index 0000000..b78017a
--- /dev/null
+++ b/doc/source/images/os-green.svg
@@ -0,0 +1 @@
+
diff --git a/doc/source/images/os.svg b/doc/source/images/os.svg
deleted file mode 100644
index 314bf44..0000000
--- a/doc/source/images/os.svg
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 436e1b8..caa50b5 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -6,19 +6,11 @@
kaldifeat
=========
-`kaldifeat `_ implements
-feature extraction algorithms **compatible** with kaldi using PyTorch, supporting CUDA
-as well as autograd.
-
-Currently, only fbank features are supported.
-It can produce the same feature output as ``compute-fbank-feats`` (from kaldi)
-when given the same options.
-
-
.. toctree::
:maxdepth: 2
- :caption: Contents:
+ :caption: Contents
+ intro
installation
- usage
+ usage/index
diff --git a/doc/source/installation.rst b/doc/source/installation.rst
index 9e4bfc8..5baa217 100644
--- a/doc/source/installation.rst
+++ b/doc/source/installation.rst
@@ -1,19 +1,38 @@
Installation
============
+ - |os_types|
+ - |python_versions|
+ - |pytorch_versions|
+ - |cuda_versions|
+
+.. caution::
+
+ `kaldifeat`_ depends on `PyTorch`_. `PyTorch`_ >= 1.5.0 is known to work.
+
+ Please first install `PyTorch`_ before you install `kaldifeat`_.
+
+.. hint::
+
+ To install a CPU version of `kaldifeat`_, please install a CPU version
+ of `PyTorch`_.
+
+ To install a CUDA version of `kaldifeat`_, please install a CUDA version
+ of `PyTorch`_. CUDA >= 10.1 is known to work.
+
.. _from source:
Install kaldifeat from source
-----------------------------
-You have to install ``cmake`` and ``PyTorch`` first.
+You have to install ``cmake`` and `PyTorch`_ first.
- ``cmake`` 3.11 is known to work. Other CMake versions may also work.
- - ``PyTorch`` 1.8.1 is known to work. Other PyTorch versions may also work.
+ - `PyTorch`_ >= 1.5.0 is known to work. Other PyTorch versions may also work.
- Python >= 3.6
-The commands to install ``kaldifeat`` from source are:
+The commands to install `kaldifeat`_ from source are:
.. code-block:: bash
@@ -21,7 +40,7 @@ The commands to install ``kaldifeat`` from source are:
cd kaldifeat
python3 setup.py install
-To test that you have installed ``kaldifeat`` successfully, please run:
+To test that you have installed `kaldifeat`_ successfully, please run:
.. code-block:: bash
@@ -29,26 +48,120 @@ To test that you have installed ``kaldifeat`` successfully, please run:
It should print the version, e.g., ``1.0``.
+.. _from PyPI:
+
Install kaldifeat from PyPI
---------------------------
-The pre-built ``kaldifeat`` hosted on PyPI uses PyTorch 1.8.1.
-If you install ``kaldifeat`` using pip, it will replace your locally
-installed PyTorch automatically with PyTorch 1.8.1.
-
-If you don't want this happen, please `Install kaldifeat from source`_.
-
-The command to install ``kaldifeat`` from PyPI is:
+The command to install `kaldifeat`_ from PyPI is:
.. code-block:: bash
- pip install kaldifeat
+ pip install --verbose kaldifeat
-
-To test that you have installed ``kaldifeat`` successfully, please run:
+To test that you have installed `kaldifeat`_ successfully, please run:
.. code-block:: bash
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
It should print the version, e.g., ``1.0``.
+
+Install kaldifeat from conda (Only for Linux)
+---------------------------------------------
+
+.. hint::
+
+ Installation using ``conda`` supports only Linux. For macOS and Windows,
+ please use either :ref:`from source` or :ref:`from PyPI`.
+
+The command to install `kaldifeat` using ``conda`` is
+
+.. code-block:: bash
+
+ conda install -c kaldifeat -c pytorch -c conda-forge kaldifeat python=3.8 cudatoolkit=11.1 pytorch=1.8.1
+
+You can select the supported Python version, CUDA toolkit version and `PyTorch`_ version as you wish.
+
+To install a CPU version of `kaldifeat`, use:
+
+.. code-block:: bash
+
+ conda install -c kaldifeat -c pytorch cpuonly kaldifeat python=3.8 pytorch=1.8.1
+
+.. caution::
+
+ If you encounter issues about missing GLIBC after installing `kaldifeat`_
+ with ``conda``, please consider :ref:`from source` or :ref:`from PyPI`.
+ The reason is that the package was built using Ubuntu 18.04 and your system's
+ GLIBC is older.
+
+
+.. |os_types| image:: ./images/os-green.svg
+ :alt: Supported operating systems
+
+.. |python_versions| image:: ./images/python_ge_3.6-blue.svg
+ :alt: Supported python versions
+
+.. |cuda_versions| image:: ./images/cuda_ge_10.1-orange.svg
+ :alt: Supported cuda versions
+
+.. |pytorch_versions| image:: ./images/pytorch_ge_1.5.0-green.svg
+ :alt: Supported pytorch versions
+
+To test that you have installed `kaldifeat`_ successfully, please run:
+
+.. code-block:: bash
+
+ python3 -c "import kaldifeat; print(kaldifeat.__version__)"
+
+It should print the version, e.g., ``1.0``.
+
+FAQs
+----
+
+How to install a CUDA version of kaldifeat
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You need to first install a CUDA version of `PyTorch`_ and then install `kaldifeat`_.
+
+.. note::
+
+ You can use a CUDA version of `kaldifeat`_ on machines with no GPUs.
+
+How to install a CPU version of kaldifeat
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You need to first install a CPU version of `PyTorch`_ and then install `kaldifeat`_.
+
+How to fix `Caffe2: Cannot find cuDNN library`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block::
+
+ Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN
+ libraries. Please set the proper cuDNN prefixes and / or install cuDNN.
+
+You will have such an error when you want to install a CUDA version of `kaldifeat`_
+by ``pip install kaldifeat`` or from source.
+
+You need to first install cuDNN. Assume you have installed cuDNN to the
+path ``/path/to/cudnn``. You can fix the error by using ``one`` of the following
+commands.
+
+(1) Fix for installation using ``pip install``
+
+.. code-block:: bash
+
+ export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
+ pip install --verbose kaldifeat
+
+(2) Fix for installation from source
+
+.. code-block:: bash
+
+ mkdir /some/path
+ git clone https://github.com/csukuangfj/kaldifeat.git
+ cd kaldifeat
+ export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
+ python setup.py install
diff --git a/doc/source/intro.rst b/doc/source/intro.rst
new file mode 100644
index 0000000..6e66c36
--- /dev/null
+++ b/doc/source/intro.rst
@@ -0,0 +1,103 @@
+Introduction
+============
+
+`kaldifeat`_ implements
+speech feature extraction algorithms **compatible** with `Kaldi`_ using `PyTorch`_,
+supporting CUDA as well as autograd.
+
+`kaldifeat`_ has the following features:
+
+ - Fully compatible with `Kaldi`_
+
+ .. note::
+
+ The underlying C++ code is copied & modified from `Kaldi`_ directly.
+ It is rewritten with `PyTorch` C++ APIs.
+
+ - Provide not only ``C++ APIs`` but also ``Python APIs``
+
+ .. note::
+
+ You can access `kaldifeat`_ from ``Python``.
+
+ - Support autograd
+ - Support ``CUDA`` and ``CPU``
+
+ .. note::
+
+ You can use CUDA for feature extraction.
+
+ - Support ``online`` (i.e., ``streaming``) and ``offline`` (i.e., ``non-streaming``)
+ feature extraction
+ - Support chunk-based processing
+
+ .. note::
+
+ This is especially usefull if you want to process audios of several
+ hours long, which may cause OOM if you send them for computation at once.
+ With chunk-based processing, you can process audios of arbirtray length.
+
+ - Support batch processing
+
+ .. note::
+
+ With `kaldifeat`_ you can extract features for a batch of audios
+
+
+.. see https://sublime-and-sphinx-guide.readthedocs.io/en/latest/tables.html
+
+Currently implemented speech features and their counterparts in `Kaldi`_ are
+listed in the following table.
+
+.. list-table:: Supported speech features
+ :widths: 50 50
+ :header-rows: 1
+
+ * - Supported speech features
+ - Counterpart in `Kaldi`_
+ * - `kaldifeat.Fbank`_
+ - `compute-fbank-feats`_
+ * - `kaldifeat.Mfcc`_
+ - `compute-mfcc-feats`_
+ * - `kaldifeat.Plp`_
+ - `compute-plp-feats`_
+ * - `kaldifeat.Spectrogram`_
+ - `compute-spectrogram-feats`_
+ * - `kaldifeat.OnlineFbank`_
+ - `kaldi::OnlineFbank`_
+ * - `kaldifeat.OnlineMfcc`_
+ - `kaldi::OnlineMfcc`_
+ * - `kaldifeat.OnlinePlp`_
+ - `kaldi::OnlinePlp`_
+
+Each feature computer needs an option. The following table lists the options
+for each computer and the corresponding options in `Kaldi`_.
+
+.. hint::
+
+ Note that we reuse the parameter names from `Kaldi`_.
+
+ Also, both online feature computers and offline feature computers share the
+ same option.
+
+.. list-table:: Feature computer options
+ :widths: 50 50
+ :header-rows: 1
+
+ * - Options in `kaldifeat`_
+ - Corresponding options in `Kaldi`_
+ * - `kaldifeat.FbankOptions`_
+ - `kaldi::FbankOptions`_
+ * - `kaldifeat.MfccOptions`_
+ - `kaldi::MfccOptions`_
+ * - `kaldifeat.PlpOptions`_
+ - `kaldi::PlpOptions`_
+ * - `kaldifeat.SpectrogramOptions`_
+ - `kaldi::SpectrogramOptions`_
+ * - `kaldifeat.FrameExtractionOptions`_
+ - `kaldi::FrameExtractionOptions`_
+ * - `kaldifeat.MelBanksOptions`_
+ - `kaldi::MelBanksOptions`_
+
+Read more to learn how to install `kaldifeat`_ and how to use each feature
+computer.
diff --git a/doc/source/usage.rst b/doc/source/usage.rst
deleted file mode 100644
index dd6a770..0000000
--- a/doc/source/usage.rst
+++ /dev/null
@@ -1,212 +0,0 @@
-Usage
-=====
-
-Let us first see the help message of kaldi's ``compute-fbank-feats``:
-
-.. code-block:: bash
-
- $ compute-fbank-feats
-
- Create Mel-filter bank (FBANK) feature files.
- Usage: compute-fbank-feats [options...]
-
- Options:
- --allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
- --allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
- --blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42)
- --channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
- --debug-mel : Print out debugging information for mel bin computation (bool, default = false)
- --dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
- --energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0)
- --frame-length : Frame length in milliseconds (float, default = 25)
- --frame-shift : Frame shift in milliseconds (float, default = 10)
- --high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
- --htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
- --low-freq : Low cutoff frequency for mel bins (float, default = 20)
- --max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
- --min-duration : Minimum duration of segments to process (in seconds). (float, default = 0)
- --num-mel-bins : Number of triangular mel-frequency bins (int, default = 23)
- --output-format : Format of the output files [kaldi, htk] (string, default = "kaldi")
- --preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97)
- --raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true)
- --remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true)
- --round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
- --sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
- --snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
- --subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false)
- --use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false)
- --use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true)
- --use-power : If true, use power, else use magnitude. (bool, default = true)
- --utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
- --vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
- --vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
- --vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
- --vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
- --window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
- --write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
-
- Standard options:
- --config : Configuration file to read (this option may be repeated) (string, default = "")
- --help : Print out usage message (bool, default = false)
- --print-args : Print the command line arguments (to stderr) (bool, default = true)
- --verbose : Verbose level (higher->more logging) (int, default = 0)
-
-FbankOptions
-------------
-
-``kaldifeat`` reuses the same options from kaldi's ``compute-fbank-feats``.
-
-The following shows the default values of ``kaldifeat.FbankOptions``:
-
-.. code-block:: python
-
- >>> import kaldifeat
- >>> fbank_opts = kaldifeat.FbankOptions()
- >>> print(fbank_opts)
- frame_opts:
- samp_freq: 16000
- frame_shift_ms: 10
- frame_length_ms: 25
- dither: 1
- preemph_coeff: 0.97
- remove_dc_offset: 1
- window_type: povey
- round_to_power_of_two: 1
- blackman_coeff: 0.42
- snip_edges: 1
-
-
- mel_opts:
- num_bins: 23
- low_freq: 20
- high_freq: 0
- vtln_low: 100
- vtln_high: -500
- debug_mel: 0
- htk_mode: 0
-
- use_energy: 0
- energy_floor: 0
- raw_energy: 1
- htk_compat: 0
- use_log_fbank: 1
- use_power: 1
- device: cpu
-
-It consists of three parts:
-
- - ``frame_opts``
-
- Options in this part are accessed by ``frame_opts.xxx``. That is, to access
- the sample rate, you use:
-
- .. code-block:: python
-
- >>> fbank_opts = kaldifeat.FbankOptions()
- >>> print(fbank_opts.frame_opts.samp_freq)
- 16000.0
-
- - ``mel_opts``
-
- Options in this part are accessed by ``mel_opts.xxx``. That is, to access
- the number of mel bins, you use:
-
- .. code-block:: python
-
- >>> fbank_opts = kaldifeat.FbankOptions()
- >>> print(fbank_opts.mel_opts.num_bins)
- 23
-
- - fbank related
-
- Options in this part are accessed directly. That is, to access the device
- field, you use:
-
- .. code-block::
-
- >>> print(fbank_opts.device)
- cpu
- >>> fbank_opts.device = 'cuda:0'
- >>> print(fbank_opts.device)
- cuda:0
- >>> import torch
- >>> fbank_opts.device = torch.device('cuda', 0)
- >>> print(fbank_opts.device)
- cuda:0
-
-
-
-To change the sample rate to 8000, you can use:
-
-.. code-block:: python
-
- >>> fbank_opts = kaldifeat.FbankOptions()
- >>> print(fbank_opts.frame_opts.samp_freq)
- 16000.0
- >>> fbank_opts.frame_opts.samp_freq = 8000
- >>> print(fbank_opts.frame_opts.samp_freq)
- 8000.0
-
-To change ``snip_edges`` to ``False``, you can use:
-
-.. code-block:: python
-
- >>> fbank_opts.frame_opts.snip_edges = False
- >>> print(fbank_opts.frame_opts.snip_edges)
- False
-
-To change number of mel bins to 80, you can use:
-
-.. code-block:: python
-
- >>> print(fbank_opts.mel_opts.num_bins)
- 23
- >>> fbank_opts.mel_opts.num_bins = 80
- >>> print(fbank_opts.mel_opts.num_bins)
- 80
-
-To change the device to ``cuda``, you can use:
-
-
-Fbank
------
-
-The following shows how to use ``kaldifeat.Fbank`` to compute
-the fbank features of sound files.
-
-First, let us generate two sound files using ``sox``:
-
-.. code-block:: bash
-
- # generate a wav of two seconds, containing a sine-wave
- # swept from 300 Hz to 3300 Hz
- sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300
-
- # another sound file with 0.5 seconds
- sox -n -r 16000 -b 16 test2.wav synth 0.5 sine 300-3300
-
-.. hint::
-
- You can find the above two files by visiting the following two links:
-
- - `test.wav `_
- - `test2.wav `_
-
-The `following code `_
-shows the usage of ``kaldifeat.Fbank``.
-
-It shows:
-
- - How to read a sound file. Note that audio samples are scaled to the range [-32768, 32768].
- The intention is to produce the same output as kaldi. You don't need to scale it if
- you don't care about the compatibility with kaldi
-
- - ``kaldifeat.Fbank`` supports CUDA as well as CPU
-
- - ``kaldifeat.Fbank`` supports processing sound file in a batch as well as accepting
- a single sound file
-
-
-.. literalinclude:: ./code/test_fbank.py
- :caption: Demo of ``kaldifeat.Fbank``
- :language: python
diff --git a/doc/source/usage/code/compute-fbank-feats-help.txt b/doc/source/usage/code/compute-fbank-feats-help.txt
new file mode 100644
index 0000000..3922636
--- /dev/null
+++ b/doc/source/usage/code/compute-fbank-feats-help.txt
@@ -0,0 +1,46 @@
+compute-fbank-feats
+
+Create Mel-filter bank (FBANK) feature files.
+Usage: compute-fbank-feats [options...]
+
+Options:
+ --allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
+ --allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
+ --blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42)
+ --channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
+ --debug-mel : Print out debugging information for mel bin computation (bool, default = false)
+ --dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
+ --energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0)
+ --frame-length : Frame length in milliseconds (float, default = 25)
+ --frame-shift : Frame shift in milliseconds (float, default = 10)
+ --high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
+ --htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
+ --low-freq : Low cutoff frequency for mel bins (float, default = 20)
+ --max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
+ --min-duration : Minimum duration of segments to process (in seconds). (float, default = 0)
+ --num-mel-bins : Number of triangular mel-frequency bins (int, default = 23)
+ --output-format : Format of the output files [kaldi, htk] (string, default = "kaldi")
+ --preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97)
+ --raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true)
+ --remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true)
+ --round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
+ --sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
+ --snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
+ --subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false)
+ --use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false)
+ --use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true)
+ --use-power : If true, use power, else use magnitude. (bool, default = true)
+ --utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
+ --vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
+ --vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
+ --vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
+ --vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
+ --window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
+ --write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
+
+Standard options:
+ --config : Configuration file to read (this option may be repeated) (string, default = "")
+ --help : Print out usage message (bool, default = false)
+ --print-args : Print the command line arguments (to stderr) (bool, default = true)
+ --verbose : Verbose level (higher->more logging) (int, default = 0)
+
diff --git a/doc/source/usage/code/fbank_options-1.txt b/doc/source/usage/code/fbank_options-1.txt
new file mode 100644
index 0000000..7e0470a
--- /dev/null
+++ b/doc/source/usage/code/fbank_options-1.txt
@@ -0,0 +1,65 @@
+$ python3
+Python 3.8.0 (default, Oct 28 2019, 16:14:01)
+[GCC 8.3.0] on linux
+Type "help", "copyright", "credits" or "license" for more information.
+>>> import kaldifeat
+>>> opts = kaldifeat.FbankOptions()
+>>> print(opts)
+frame_opts:
+samp_freq: 16000
+frame_shift_ms: 10
+frame_length_ms: 25
+dither: 1
+preemph_coeff: 0.97
+remove_dc_offset: 1
+window_type: povey
+round_to_power_of_two: 1
+blackman_coeff: 0.42
+snip_edges: 1
+max_feature_vectors: -1
+
+
+mel_opts:
+num_bins: 23
+low_freq: 20
+high_freq: 0
+vtln_low: 100
+vtln_high: -500
+debug_mel: 0
+htk_mode: 0
+
+use_energy: 0
+energy_floor: 0
+raw_energy: 1
+htk_compat: 0
+use_log_fbank: 1
+use_power: 1
+device: cpu
+
+>>> print(opts.dither)
+Traceback (most recent call last):
+ File "", line 1, in
+AttributeError: '_kaldifeat.FbankOptions' object has no attribute 'dither'
+>>>
+>>> print(opts.frame_opts.dither)
+1.0
+>>> opts.frame_opts.dither = 0 # disable dither
+>>> print(opts.frame_opts.dither)
+0.0
+>>> import torch
+>>> print(opts.device)
+cpu
+>>> opts.device = 'cuda:0'
+>>> print(opts.device)
+cuda:0
+>>> opts.device = torch.device('cuda', 1)
+>>> print(opts.device)
+cuda:1
+>>> opts.device = 'cpu'
+>>> print(opts.device)
+cpu
+>>> print(opts.mel_opts.num_bins)
+23
+>>> opts.mel_opts.num_bins = 80
+>>> print(opts.mel_opts.num_bins)
+80
diff --git a/doc/source/usage/code/test_fbank_options.py b/doc/source/usage/code/test_fbank_options.py
new file mode 120000
index 0000000..3bfe0fa
--- /dev/null
+++ b/doc/source/usage/code/test_fbank_options.py
@@ -0,0 +1 @@
+../../../../kaldifeat/python/tests/test_fbank_options.py
\ No newline at end of file
diff --git a/doc/source/usage/fbank.rst b/doc/source/usage/fbank.rst
new file mode 100644
index 0000000..e3f1351
--- /dev/null
+++ b/doc/source/usage/fbank.rst
@@ -0,0 +1,3 @@
+kaldifeat.Fbank
+===============
+
diff --git a/doc/source/usage/fbank_options.rst b/doc/source/usage/fbank_options.rst
new file mode 100644
index 0000000..d9adc2d
--- /dev/null
+++ b/doc/source/usage/fbank_options.rst
@@ -0,0 +1,52 @@
+kaldifeat.FbankOptions
+======================
+
+If you want to construct an instance of `kaldifeat.Fbank`_ or
+`kaldifeat.OnlineFbank`_, you have to provide an instance of
+`kaldifeat.FbankOptions`_.
+
+The following code shows how to construct an instance of `kaldifeat.FbankOptions`_.
+
+.. literalinclude:: ./code/fbank_options-1.txt
+ :caption: Usage of `kaldifeat.FbankOptions`_
+ :emphasize-lines: 6,8,22,37
+ :language: python
+
+Note that we reuse the same option name with `compute-fbank-feats`_ from `Kaldi`_:
+
+.. code-block:: bash
+
+ $ compute-fbank-feats --help
+
+
+.. literalinclude:: ./code/compute-fbank-feats-help.txt
+ :caption: Output of ``compute-fbank-feats --help``
+
+Please refer to the output of ``compute-fbank-feats --help`` for the meaning
+of each field of `kaldifeat.FbankOptions`_.
+
+One thing worth noting is that `kaldifeat.FbankOptions`_ has a field ``device``,
+which is an instance of ``torch.device``. You can assign it either a string, e.g.,
+``"cpu"`` or ``"cuda:0"``, or an instance of ``torch.device``, e.g., ``torch.device("cpu")`` or
+``torch.device("cuda", 1)``.
+
+.. hint::
+
+ You can use this field to control whether the feature computer
+ constructed from it performs computation on CPU or CUDA.
+
+.. caution::
+
+ If you use a CUDA device, make sure that you have installed a CUDA version
+ of `PyTorch`_.
+
+Example usage
+-------------
+
+The following code from
+``_
+demonstrate the usage of `kaldifeat.FbankOptions`_:
+
+.. literalinclude:: ./code/test_fbank_options.py
+ :caption: Example usage of `kaldifeat.FbankOptions`_
+ :language: python
diff --git a/doc/source/usage/index.rst b/doc/source/usage/index.rst
new file mode 100644
index 0000000..f40dcd5
--- /dev/null
+++ b/doc/source/usage/index.rst
@@ -0,0 +1,11 @@
+Usage
+=====
+
+This section describes how to use feature computers in `kaldifeat`_.
+
+.. toctree::
+ :maxdepth: 2
+
+ fbank_options
+ fbank
+ online_fbank
diff --git a/doc/source/usage/online_fbank.rst b/doc/source/usage/online_fbank.rst
new file mode 100644
index 0000000..557104d
--- /dev/null
+++ b/doc/source/usage/online_fbank.rst
@@ -0,0 +1,3 @@
+kaldifeat.OnlineFbank
+=====================
+
diff --git a/kaldifeat/csrc/CMakeLists.txt b/kaldifeat/csrc/CMakeLists.txt
index 39f2c1c..9900b96 100644
--- a/kaldifeat/csrc/CMakeLists.txt
+++ b/kaldifeat/csrc/CMakeLists.txt
@@ -31,9 +31,15 @@ function(kaldifeat_add_test source)
gtest_main
)
+ # NOTE: We set the working directory here so that
+ # it works also on windows. The reason is that
+ # the required DLLs are inside ${TORCH_DIR}/lib
+ # and they can be found by the exe if the current
+ # working directory is ${TORCH_DIR}\lib
add_test(NAME "Test.${name}"
COMMAND
$
+ WORKING_DIRECTORY ${TORCH_DIR}/lib
)
endfunction()
@@ -47,4 +53,5 @@ if(kaldifeat_BUILD_TESTS)
foreach(source IN LISTS test_srcs)
kaldifeat_add_test(${source})
endforeach()
+
endif()
diff --git a/kaldifeat/python/csrc/CMakeLists.txt b/kaldifeat/python/csrc/CMakeLists.txt
index c80637c..1403e6d 100644
--- a/kaldifeat/python/csrc/CMakeLists.txt
+++ b/kaldifeat/python/csrc/CMakeLists.txt
@@ -10,6 +10,17 @@ pybind11_add_module(_kaldifeat
online-feature.cc
utils.cc
)
+
+if(APPLE)
+ execute_process(
+ COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
+ )
+ message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
+ target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
+endif()
+
target_link_libraries(_kaldifeat PRIVATE kaldifeat_core)
if(UNIX AND NOT APPLE)
target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/libtorch_python.so)
diff --git a/kaldifeat/python/csrc/feature-fbank.cc b/kaldifeat/python/csrc/feature-fbank.cc
index dcc9b14..6e52f0c 100644
--- a/kaldifeat/python/csrc/feature-fbank.cc
+++ b/kaldifeat/python/csrc/feature-fbank.cc
@@ -53,7 +53,7 @@ static void PybindFbank(py::module &m) {
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
- py::arg("vtln_warp"))
+ py::arg("vtln_warp"), py::call_guard())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
diff --git a/kaldifeat/python/csrc/feature-mfcc.cc b/kaldifeat/python/csrc/feature-mfcc.cc
index 40d330e..fe893cb 100644
--- a/kaldifeat/python/csrc/feature-mfcc.cc
+++ b/kaldifeat/python/csrc/feature-mfcc.cc
@@ -53,7 +53,7 @@ static void PybindMfcc(py::module &m) {
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
- py::arg("vtln_warp"))
+ py::arg("vtln_warp"), py::call_guard())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
diff --git a/kaldifeat/python/csrc/feature-plp.cc b/kaldifeat/python/csrc/feature-plp.cc
index abc5595..364ef93 100644
--- a/kaldifeat/python/csrc/feature-plp.cc
+++ b/kaldifeat/python/csrc/feature-plp.cc
@@ -56,7 +56,7 @@ static void PybindPlp(py::module &m) {
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
- py::arg("vtln_warp"))
+ py::arg("vtln_warp"), py::call_guard())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
diff --git a/kaldifeat/python/csrc/feature-spectrogram.cc b/kaldifeat/python/csrc/feature-spectrogram.cc
index 62aa909..24b156b 100644
--- a/kaldifeat/python/csrc/feature-spectrogram.cc
+++ b/kaldifeat/python/csrc/feature-spectrogram.cc
@@ -53,7 +53,7 @@ static void PybindSpectrogram(py::module &m) {
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
- py::arg("vtln_warp"))
+ py::arg("vtln_warp"), py::call_guard())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
diff --git a/kaldifeat/python/csrc/online-feature.cc b/kaldifeat/python/csrc/online-feature.cc
index 13e4a4f..2d1dcd8 100644
--- a/kaldifeat/python/csrc/online-feature.cc
+++ b/kaldifeat/python/csrc/online-feature.cc
@@ -22,9 +22,11 @@ void PybindOnlineFeatureTpl(py::module &m, const std::string &class_name,
.def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
.def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
.def("get_frame", &PyClass::GetFrame, py::arg("frame"))
- .def("get_frames", &PyClass::GetFrames, py::arg("frames"))
+ .def("get_frames", &PyClass::GetFrames, py::arg("frames"),
+ py::call_guard())
.def("accept_waveform", &PyClass::AcceptWaveform,
- py::arg("sampling_rate"), py::arg("waveform"))
+ py::arg("sampling_rate"), py::arg("waveform"),
+ py::call_guard())
.def("input_finished", &PyClass::InputFinished);
}
diff --git a/kaldifeat/python/kaldifeat/__init__.py b/kaldifeat/python/kaldifeat/__init__.py
index ea39003..adf7d79 100644
--- a/kaldifeat/python/kaldifeat/__init__.py
+++ b/kaldifeat/python/kaldifeat/__init__.py
@@ -1,4 +1,13 @@
import torch
+
+from .torch_version import kaldifeat_torch_version
+
+if torch.__version__.split("+")[0] != kaldifeat_torch_version.split("+")[0]:
+ raise ImportError(
+ f"kaldifeat was built using PyTorch {kaldifeat_torch_version}\n"
+ f"But you are using PyTorch {torch.__version__} to run it"
+ )
+
from _kaldifeat import (
FbankOptions,
FrameExtractionOptions,
diff --git a/kaldifeat/python/kaldifeat/torch_version.py.in b/kaldifeat/python/kaldifeat/torch_version.py.in
new file mode 100644
index 0000000..e6365fa
--- /dev/null
+++ b/kaldifeat/python/kaldifeat/torch_version.py.in
@@ -0,0 +1,12 @@
+# Auto generated by the toplevel CMakeLists.txt.
+#
+# DO NOT EDIT.
+
+# The torch version used to build kaldifeat. We will check it against the
+# torch version that is used to run kaldifeat. If they are not the same,
+# `import kaldifeat` will throw.
+#
+# Some example values are:
+# - 1.10.0+cu102
+# - 1.5.0+cpu
+kaldifeat_torch_version = "@TORCH_VERSION@"
diff --git a/kaldifeat/python/tests/Makefile b/kaldifeat/python/tests/Makefile
new file mode 100644
index 0000000..20f98ff
--- /dev/null
+++ b/kaldifeat/python/tests/Makefile
@@ -0,0 +1,13 @@
+
+.PHONY: test
+test:
+ python3 ./test_fbank.py
+ python3 ./test_fbank_options.py
+ python3 ./test_frame_extraction_options.py
+ python3 ./test_mel_bank_options.py
+ python3 ./test_mfcc.py
+ python3 ./test_mfcc_options.py
+ python3 ./test_plp.py
+ python3 ./test_plp_options.py
+ python3 ./test_spectrogram.py
+ python3 ./test_spectrogram_options.py
diff --git a/scripts/build_conda_cpu.sh b/scripts/build_conda_cpu.sh
new file mode 100755
index 0000000..3d2c47a
--- /dev/null
+++ b/scripts/build_conda_cpu.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+#
+# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The following environment variables are supposed to be set by users
+#
+# - KALDIFEAT_TORCH_VERSION
+# The PyTorch version. Example:
+#
+# export KALDIFEAT_TORCH_VERSION=1.7.1
+#
+# Defaults to 1.7.1 if not set.
+#
+# - KALDIFEAT_CONDA_TOKEN
+# If not set, auto upload to anaconda.org is disabled.
+#
+# Its value is from https://anaconda.org/kaldifeat/settings/access
+# (You need to login as user kaldifeat to see its value)
+#
+# - KALDIFEAT_BUILD_TYPE
+# If not set, defaults to Release.
+
+set -e
+export CONDA_BUILD=1
+
+cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd)
+kaldifeat_dir=$(cd $cur_dir/.. && pwd)
+
+cd $kaldifeat_dir
+
+export KALDIFEAT_ROOT_DIR=$kaldifeat_dir
+echo "KALDIFEAT_ROOT_DIR: $KALDIFEAT_ROOT_DIR"
+
+KALDIFEAT_PYTHON_VERSION=$(python3 -c "import sys; print(sys.version[:3])")
+
+if [ -z $KALDIFEAT_TORCH_VERSION ]; then
+ echo "env var KALDIFEAT_TORCH_VERSION is not set, defaults to 1.7.1"
+ KALDIFEAT_TORCH_VERSION=1.7.1
+fi
+
+if [ -z $KALDIFEAT_BUILD_TYPE ]; then
+ echo "env var KALDIFEAT_BUILD_TYPE is not set, defaults to Release"
+ KALDIFEAT_BUILD_TYPE=Release
+fi
+
+export KALDIFEAT_IS_FOR_CONDA=1
+
+# Example value: 3.8
+export KALDIFEAT_PYTHON_VERSION
+
+# Example value: 1.7.1
+export KALDIFEAT_TORCH_VERSION
+
+export KALDIFEAT_BUILD_TYPE
+
+if [ ! -z $KALDIFEAT_IS_GITHUB_ACTIONS ]; then
+ export KALDIFEAT_IS_GITHUB_ACTIONS
+ conda remove -q pytorch
+ conda clean -q -a
+else
+ export KALDIFEAT_IS_GITHUB_ACTIONS=0
+fi
+
+if [ -z $KALDIFEAT_CONDA_TOKEN ]; then
+ echo "Auto upload to anaconda.org is disabled since KALDIFEAT_CONDA_TOKEN is not set"
+ conda build --no-test --no-anaconda-upload -c pytorch -c conda-forge ./scripts/conda-cpu/kaldifeat
+else
+ conda build --no-test -c pytorch -c conda-forge --token $KALDIFEAT_CONDA_TOKEN ./scripts/conda-cpu/kaldifeat
+fi
diff --git a/scripts/conda-cpu/cpuonly/meta.yaml b/scripts/conda-cpu/cpuonly/meta.yaml
new file mode 100644
index 0000000..33ec762
--- /dev/null
+++ b/scripts/conda-cpu/cpuonly/meta.yaml
@@ -0,0 +1,10 @@
+# this file is copied from
+# https://github.com/pytorch/builder/tree/master/conda/cpuonly
+package:
+ name: cpuonly
+ version: 1.0
+
+build:
+ track_features:
+ - cpuonly
+ noarch: generic
diff --git a/scripts/conda-cpu/kaldifeat/build.sh b/scripts/conda-cpu/kaldifeat/build.sh
new file mode 100644
index 0000000..6e24b9d
--- /dev/null
+++ b/scripts/conda-cpu/kaldifeat/build.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+#
+# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -ex
+
+CONDA_ENV_DIR=$CONDA_PREFIX
+
+echo "KALDIFEAT_PYTHON_VERSION: $KALDIFEAT_PYTHON_VERSION"
+echo "KALDIFEAT_TORCH_VERSION: $KALDIFEAT_TORCH_VERSION"
+echo "KALDIFEAT_BUILD_TYPE: $KALDIFEAT_BUILD_TYPE"
+echo "KALDIFEAT_BUILD_VERSION: $KALDIFEAT_BUILD_VERSION"
+
+export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${KALDIFEAT_BUILD_TYPE}"
+export KALDIFEAT_MAKE_ARGS="-j1 VERBOSE=1"
+
+export LIBRARY_PATH="/usr/local/miniconda/envs/kaldifeat/lib":$LIBRARY_PATH
+export LD_LIBRARY_PATH="/usr/local/miniconda/envs/kaldifeat/lib":$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH="/usr/local/miniconda/envs/kaldifeat/lib":$DYLD_LIBRARY_PATH
+
+python3 setup.py install --single-version-externally-managed --record=record.txt
diff --git a/scripts/conda-cpu/kaldifeat/meta.yaml b/scripts/conda-cpu/kaldifeat/meta.yaml
new file mode 100644
index 0000000..08fe5e4
--- /dev/null
+++ b/scripts/conda-cpu/kaldifeat/meta.yaml
@@ -0,0 +1,44 @@
+package:
+ name: kaldifeat
+ version: "1.16"
+
+source:
+ path: "{{ environ.get('KALDIFEAT_ROOT_DIR') }}"
+
+build:
+ number: 0
+ string: cpu_py{{ environ.get('KALDIFEAT_PYTHON_VERSION') }}_torch{{ environ.get('KALDIFEAT_TORCH_VERSION') }}
+ script_env:
+ - KALDIFEAT_IS_GITHUB_ACTIONS
+ - KALDIFEAT_TORCH_VERSION
+ - KALDIFEAT_PYTHON_VERSION
+ - KALDIFEAT_BUILD_TYPE
+ - KALDIFEAT_BUILD_VERSION
+ - KALDIFEAT_IS_FOR_CONDA
+
+requirements:
+ build:
+ - {{ compiler('c') }} # [win]
+ - {{ compiler('cxx') }} # [win]
+
+ host:
+ - cmake=3.18
+ - python
+ - pytorch={{ environ.get('KALDIFEAT_TORCH_VERSION') }}
+ - gcc_linux-64=7 # [linux]
+ - cpuonly
+ - numpy
+
+ run:
+ - python
+ - pytorch={{ environ.get('KALDIFEAT_TORCH_VERSION') }}
+ - numpy
+
+about:
+ home: https://github.com/csukuangfj/kaldifeat
+ license: Apache V2
+ license_file: LICENSE
+ summary: Kaldi-compatible feature extraction with PyTorch
+ description: |
+ Kaldi-compatible feature extraction with PyTorch,
+ supporting CUDA, batch processing, chunk processing, and autograd
diff --git a/scripts/conda/kaldifeat/build.sh b/scripts/conda/kaldifeat/build.sh
index 3897511..4539872 100644
--- a/scripts/conda/kaldifeat/build.sh
+++ b/scripts/conda/kaldifeat/build.sh
@@ -32,6 +32,6 @@ echo "gcc version: $($CC --version)"
echo "nvcc version: $(nvcc --version)"
export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${KALDIFEAT_BUILD_TYPE}"
-export KALDIFEAT_MAKE_ARGS="-j2"
+export KALDIFEAT_MAKE_ARGS="-j3"
python3 setup.py install --single-version-externally-managed --record=record.txt
diff --git a/scripts/conda/kaldifeat/meta.yaml b/scripts/conda/kaldifeat/meta.yaml
index 6a1b485..4979060 100644
--- a/scripts/conda/kaldifeat/meta.yaml
+++ b/scripts/conda/kaldifeat/meta.yaml
@@ -1,6 +1,6 @@
package:
name: kaldifeat
- version: "1.14"
+ version: "1.16"
source:
path: "{{ environ.get('KALDIFEAT_ROOT_DIR') }}"
diff --git a/scripts/github_actions/generate_build_matrix.py b/scripts/github_actions/generate_build_matrix.py
new file mode 100755
index 0000000..6b85131
--- /dev/null
+++ b/scripts/github_actions/generate_build_matrix.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# Copyright 2022 Xiaomi Corp. (authors: Fangjun Kuang)
+
+import argparse
+import json
+
+
+def get_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--enable-cuda",
+ action="store_true",
+ default=False,
+ help="True to enable CUDA",
+ )
+
+ parser.add_argument(
+ "--for-windows",
+ action="store_true",
+ default=False,
+ help="True for windows",
+ )
+
+ parser.add_argument(
+ "--test-only-latest-torch",
+ action="store_true",
+ default=False,
+ help="""If True, we test only the latest PyTroch
+ to reduce CI running time.""",
+ )
+ return parser.parse_args()
+
+
+def generate_build_matrix(enable_cuda, for_windows, test_only_latest_torch):
+ matrix = {
+ # 1.5.x is removed because there are compilation errors.
+ # See
+ # https://github.com/csukuangfj/k2/runs/2533830771?check_suite_focus=true
+ # and
+ # https://github.com/NVIDIA/apex/issues/805
+ # "1.5.0": {
+ # "python-version": ["3.6", "3.7", "3.8"],
+ # "cuda": ["10.1", "10.2"],
+ # },
+ # "1.5.1": {
+ # "python-version": ["3.6", "3.7", "3.8"],
+ # "cuda": ["10.1", "10.2"],
+ # },
+ "1.6.0": {
+ "python-version": ["3.6", "3.7", "3.8"],
+ "cuda": ["10.1", "10.2"]
+ if not for_windows
+ else ["10.1.243", "10.2.89"],
+ },
+ "1.7.0": {
+ "python-version": ["3.6", "3.7", "3.8"],
+ "cuda": ["10.1", "10.2", "11.0"]
+ if not for_windows
+ else ["10.1.243", "10.2.89", "11.0.3"],
+ },
+ "1.7.1": {
+ "python-version": ["3.6", "3.7", "3.8", "3.9"],
+ "cuda": ["10.1", "10.2", "11.0"]
+ if not for_windows
+ else ["10.1.243", "10.2.89", "11.0.3"],
+ },
+ "1.8.0": {
+ "python-version": ["3.6", "3.7", "3.8", "3.9"],
+ "cuda": ["10.1", "10.2", "11.1"]
+ if not for_windows
+ else ["10.1.243", "10.2.89", "11.1.1"],
+ },
+ "1.8.1": {
+ "python-version": ["3.6", "3.7", "3.8", "3.9"],
+ "cuda": ["10.1", "10.2", "11.1"]
+ if not for_windows
+ else ["10.1.243", "10.2.89", "11.1.1"],
+ },
+ "1.9.0": {
+ "python-version": ["3.6", "3.7", "3.8", "3.9"],
+ "cuda": ["10.2", "11.1"]
+ if not for_windows
+ else ["10.2.89", "11.1.1"],
+ },
+ "1.9.1": {
+ "python-version": ["3.6", "3.7", "3.8", "3.9"],
+ "cuda": ["10.2", "11.1"]
+ if not for_windows
+ else ["10.2.89", "11.1.1"],
+ },
+ "1.10.0": {
+ "python-version": ["3.6", "3.7", "3.8", "3.9"],
+ "cuda": ["10.2", "11.1", "11.3"]
+ if not for_windows
+ else ["10.2.89", "11.1.1", "11.3.1"],
+ },
+ "1.10.1": {
+ "python-version": ["3.6", "3.7", "3.8", "3.9"],
+ "cuda": ["10.2", "11.1", "11.3"]
+ if not for_windows
+ else ["10.2.89", "11.1.1", "11.3.1"],
+ },
+ "1.10.2": {
+ "python-version": ["3.6", "3.7", "3.8", "3.9"],
+ "cuda": ["10.2", "11.1", "11.3"]
+ if not for_windows
+ else ["10.2.89", "11.1.1", "11.3.1"],
+ },
+ "1.11.0": {
+ "python-version": ["3.7", "3.8", "3.9", "3.10"],
+ "cuda": ["10.2", "11.3", "11.5"]
+ if not for_windows
+ else ["11.3.1", "11.5.2"],
+ },
+ }
+ if test_only_latest_torch:
+ latest = "1.11.0"
+ matrix = {latest: matrix[latest]}
+
+ ans = []
+ for torch, python_cuda in matrix.items():
+ python_versions = python_cuda["python-version"]
+ cuda_versions = python_cuda["cuda"]
+ if enable_cuda:
+ for p in python_versions:
+ for c in cuda_versions:
+ ans.append({"torch": torch, "python-version": p, "cuda": c})
+ else:
+ for p in python_versions:
+ ans.append({"torch": torch, "python-version": p})
+
+ print(json.dumps({"include": ans}))
+
+
+def main():
+ args = get_args()
+ generate_build_matrix(
+ enable_cuda=args.enable_cuda,
+ for_windows=args.for_windows,
+ test_only_latest_torch=args.test_only_latest_torch,
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/github_actions/install_cuda.sh b/scripts/github_actions/install_cuda.sh
index 7d023b9..b84de89 100755
--- a/scripts/github_actions/install_cuda.sh
+++ b/scripts/github_actions/install_cuda.sh
@@ -36,6 +36,13 @@ case "$cuda" in
# url=https://developer.download.nvidia.com/compute/cuda/11.1.0/local_installers/cuda_11.1.0_455.23.05_linux.run
url=https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda_11.1.1_455.32.00_linux.run
;;
+ 11.3)
+ # url=https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda_11.3.0_465.19.01_linux.run
+ url=https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
+ ;;
+ 11.5)
+ url=https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda_11.5.2_495.29.05_linux.run
+ ;;
*)
echo "Unknown cuda version: $cuda"
exit 1
diff --git a/scripts/github_actions/install_cudnn.sh b/scripts/github_actions/install_cudnn.sh
index 853eba5..8feafbe 100755
--- a/scripts/github_actions/install_cudnn.sh
+++ b/scripts/github_actions/install_cudnn.sh
@@ -17,42 +17,43 @@
case $cuda in
10.0)
filename=cudnn-10.0-linux-x64-v7.6.5.32.tgz
- url=http://www.mediafire.com/file/1037lb1vmj9qdtq/cudnn-10.0-linux-x64-v7.6.5.32.tgz/file
;;
10.1)
filename=cudnn-10.1-linux-x64-v8.0.2.39.tgz
- url=http://www.mediafire.com/file/fnl2wg0h757qhd7/cudnn-10.1-linux-x64-v8.0.2.39.tgz/file
;;
10.2)
filename=cudnn-10.2-linux-x64-v8.0.2.39.tgz
- url=http://www.mediafire.com/file/sc2nvbtyg0f7ien/cudnn-10.2-linux-x64-v8.0.2.39.tgz/file
;;
11.0)
filename=cudnn-11.0-linux-x64-v8.0.5.39.tgz
- url=https://www.mediafire.com/file/abyhnls106ko9kp/cudnn-11.0-linux-x64-v8.0.5.39.tgz/file
;;
11.1)
- filename=cudnn-11.1-linux-x64-v8.0.5.39.tgz
- url=https://www.mediafire.com/file/qx55zd65773xonv/cudnn-11.1-linux-x64-v8.0.5.39.tgz/file
+ filename=cudnn-11.1-linux-x64-v8.0.4.30.tgz
;;
+ 11.3)
+ filename=cudnn-11.3-linux-x64-v8.2.0.53.tgz
+ ;;
+ 11.5)
+ filename=cudnn-11.3-linux-x64-v8.2.0.53.tgz
+ ;;
+ # 11.5)
+ # filename=cudnn-linux-x86_64-8.3.2.44_cuda11.5-archive.tar.xz
+ # ;;
*)
echo "Unsupported cuda version: $cuda"
exit 1
;;
esac
-function retry() {
- $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
+command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nPlease install 'git-lfs' first."; exit 2; }
-# It is forked from https://github.com/Juvenal-Yescas/mediafire-dl
-# https://github.com/Juvenal-Yescas/mediafire-dl/pull/2 changes the filename and breaks the CI.
-# We use a separate fork to keep the link fixed.
-retry wget https://raw.githubusercontent.com/csukuangfj/mediafire-dl/master/mediafire_dl.py
+git clone https://huggingface.co/csukuangfj/cudnn
+cd cudnn
+git lfs pull --include="$filename"
-sed -i 's/quiet=False/quiet=True/' mediafire_dl.py
-retry python3 mediafire_dl.py "$url"
-sudo tar xf ./$filename -C /usr/local
-rm -v ./$filename
+sudo tar xf ./$filename --strip-components=1 -C /usr/local/cuda
+
+# save disk space
+git lfs prune && cd .. && rm -rf cudnn
sudo sed -i '59i#define CUDNN_MAJOR 8' /usr/local/cuda/include/cudnn.h
diff --git a/scripts/github_actions/install_torch.sh b/scripts/github_actions/install_torch.sh
index 3ad1717..ed813c5 100755
--- a/scripts/github_actions/install_torch.sh
+++ b/scripts/github_actions/install_torch.sh
@@ -78,7 +78,7 @@ case ${torch} in
;;
esac
;;
- 1.9.0)
+ 1.9.*)
case ${cuda} in
10.2)
package="torch==${torch}"
@@ -91,6 +91,40 @@ case ${torch} in
;;
esac
;;
+ 1.10.*)
+ case ${cuda} in
+ 10.2)
+ package="torch==${torch}"
+ # Leave it empty to use PyPI.
+ url=
+ ;;
+ 11.1)
+ package="torch==${torch}+cu111"
+ url=https://download.pytorch.org/whl/torch_stable.html
+ ;;
+ 11.3)
+ package="torch==${torch}+cu113"
+ url=https://download.pytorch.org/whl/torch_stable.html
+ ;;
+ esac
+ ;;
+ 1.11.*)
+ case ${cuda} in
+ 10.2)
+ package="torch==${torch}"
+ # Leave it empty to use PyPI.
+ url=
+ ;;
+ 11.3)
+ package="torch==${torch}+cu113"
+ url=https://download.pytorch.org/whl/torch_stable.html
+ ;;
+ 11.5)
+ package="torch==${torch}+cu115"
+ url=https://download.pytorch.org/whl/torch_stable.html
+ ;;
+ esac
+ ;;
*)
echo "Unsupported PyTorch version: ${torch}"
exit 1
diff --git a/scripts/github_actions/run-nightly-build.py b/scripts/github_actions/run-nightly-build.py
new file mode 100755
index 0000000..1e002fb
--- /dev/null
+++ b/scripts/github_actions/run-nightly-build.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+# Copyright 2022 Xiaomi Corp. (authors: Fangjun Kuang)
+
+import subprocess
+from datetime import datetime, timedelta
+
+
+def get_last_commit_date() -> datetime:
+ date = (
+ subprocess.check_output(
+ [
+ "git",
+ "log",
+ "-1",
+ "--format=%ad",
+ "--date=unix",
+ ]
+ )
+ .decode("ascii")
+ .strip()
+ )
+ return datetime.utcfromtimestamp(int(date))
+
+
+def main():
+ last_commit_date_utc = get_last_commit_date()
+ now_utc = datetime.utcnow()
+ if last_commit_date_utc + timedelta(days=1) > now_utc:
+ print("true")
+ else:
+ print("false")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/setup.py b/setup.py
index 3436265..d69fb87 100644
--- a/setup.py
+++ b/setup.py
@@ -61,5 +61,7 @@ with open("kaldifeat/python/kaldifeat/__init__.py", "r") as f:
with open("kaldifeat/python/kaldifeat/__init__.py", "w") as f:
for line in lines:
- if "__version__" not in line:
- f.write(line)
+ if "__version__" in line and "torch" not in line:
+ # skip __version__ = "x.x.x"
+ continue
+ f.write(line)