diff --git a/.github/workflows/build-doc.yml b/.github/workflows/build-doc.yml
new file mode 100644
index 0000000..869b5c4
--- /dev/null
+++ b/.github/workflows/build-doc.yml
@@ -0,0 +1,62 @@
+# Copyright 2022 Xiaomi Corp. (author: Fangjun Kuang)
+
+# See ../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# refer to https://github.com/actions/starter-workflows/pull/47/files
+
+# You can access it at https://csukuangfj.github.io/kaldifeat
+name: Generate doc
+on:
+ push:
+ branches:
+ - master
+ - doc
+
+jobs:
+ build-doc:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest]
+ python-version: [3.8]
+ steps:
+ # refer to https://github.com/actions/checkout
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Setup Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Display Python version
+ run: python -c "import sys; print(sys.version)"
+
+ - name: Build doc
+ shell: bash
+ run: |
+ cd doc
+ python3 -m pip install -r ./requirements.txt
+ make html
+ touch build/html/.nojekyll
+
+ - name: Deploy
+ uses: peaceiris/actions-gh-pages@v3
+ with:
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ publish_dir: ./doc/build/html
+ publish_branch: gh-pages
diff --git a/README.md b/README.md
index 9513b14..ab6c794 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,10 @@
+[](https://csukuangfj.github.io/kaldifeat/)
+
+**Documentation**:
+
@@ -277,98 +281,6 @@ See
+for installation.
diff --git a/doc/source/_static/.gitkeep b/doc/source/_static/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/doc/source/code/test_fbank.py b/doc/source/code/test_fbank.py
deleted file mode 100755
index 0f39a1c..0000000
--- a/doc/source/code/test_fbank.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2021 Xiaomi Corporation (authors: Fangjun Kuang)
-
-import numpy as np
-import soundfile as sf
-import torch
-
-import kaldifeat
-
-
-def read_wave(filename) -> torch.Tensor:
- """Read a wave file and return it as a 1-D tensor.
-
- Note:
- You don't need to scale it to [-32768, 32767].
- We use scaling here to follow the approach in Kaldi.
-
- Args:
- filename:
- Filename of a sound file.
- Returns:
- Return a 1-D tensor containing audio samples.
- """
- with sf.SoundFile(filename) as sf_desc:
- sampling_rate = sf_desc.samplerate
- assert sampling_rate == 16000
- data = sf_desc.read(dtype=np.float32, always_2d=False)
- data *= 32768
- return torch.from_numpy(data)
-
-
-def test_fbank():
- device = torch.device("cpu")
- if torch.cuda.is_available():
- device = torch.device("cuda", 0)
-
- wave0 = read_wave("test_data/test.wav")
- wave1 = read_wave("test_data/test2.wav")
-
- wave0 = wave0.to(device)
- wave1 = wave1.to(device)
-
- opts = kaldifeat.FbankOptions()
- opts.frame_opts.dither = 0
- opts.device = device
-
- fbank = kaldifeat.Fbank(opts)
-
- # We can compute fbank features in batches
- features = fbank([wave0, wave1])
- assert isinstance(features, list), f"{type(features)}"
- assert len(features) == 2
-
- # We can also compute fbank features for a single wave
- features0 = fbank(wave0)
- features1 = fbank(wave1)
-
- assert torch.allclose(features[0], features0)
- assert torch.allclose(features[1], features1)
-
- # To compute fbank features for only a specified frame
- audio_frames = fbank.convert_samples_to_frames(wave0)
- feature_frame_1 = fbank.compute(audio_frames[1])
- feature_frame_10 = fbank.compute(audio_frames[10])
-
- assert torch.allclose(features0[1], feature_frame_1)
- assert torch.allclose(features0[10], feature_frame_10)
-
-
-if __name__ == "__main__":
- test_fbank()
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 2ec9ca6..fef6d6f 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -59,7 +59,7 @@ templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
+exclude_patterns = ["images/*.md"]
source_suffix = {
".rst": "restructuredtext",
@@ -102,3 +102,35 @@ html_theme_options = {
"prev_next_buttons_location": "bottom",
"style_external_links": True,
}
+
+rst_epilog = """
+.. _kaldifeat: https://github.com/csukuangfj/kaldifeat
+.. _Kaldi: https://github.com/kaldi-asr/kaldi
+.. _PyTorch: https://pytorch.org/
+.. _kaldifeat.Fbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L10
+.. _kaldifeat.Mfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L10
+.. _kaldifeat.Plp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L10
+.. _kaldifeat.Spectrogram: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/spectrogram.py#L9
+.. _kaldifeat.OnlineFbank: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/fbank.py#L16
+.. _kaldifeat.OnlineMfcc: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/mfcc.py#L16
+.. _kaldifeat.OnlinePlp: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/python/kaldifeat/plp.py#L16
+.. _compute-fbank-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-fbank-feats.cc
+.. _compute-mfcc-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-mfcc-feats.cc
+.. _compute-plp-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-plp-feats.cc
+.. _compute-spectrogram-feats: https://github.com/kaldi-asr/kaldi/blob/master/src/featbin/compute-spectrogram-feats.cc
+.. _kaldi::OnlineFbank: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L160
+.. _kaldi::OnlineMfcc: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L158
+.. _kaldi::OnlinePlp: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/online-feature.h#L159
+.. _kaldifeat.FbankOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-fbank.h#L19
+.. _kaldi::FbankOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-fbank.h#L41
+.. _kaldifeat.MfccOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-mfcc.h#L22
+.. _kaldi::MfccOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-mfcc.h#L38
+.. _kaldifeat.PlpOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-plp.h#L24
+.. _kaldi::PlpOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-plp.h#L42
+.. _kaldifeat.SpectrogramOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-spectrogram.h#L18
+.. _kaldi::SpectrogramOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-spectrogram.h#L38
+.. _kaldifeat.FrameExtractionOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/feature-window.h#L30
+.. _kaldi::FrameExtractionOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-window.h#L35
+.. _kaldifeat.MelBanksOptions: https://github.com/csukuangfj/kaldifeat/blob/master/kaldifeat/csrc/mel-computations.h#L17
+.. _kaldi::MelBanksOptions: https://github.com/kaldi-asr/kaldi/blob/master/src/feat/mel-computations.h#L43
+"""
diff --git a/doc/source/index.rst b/doc/source/index.rst
index 436e1b8..caa50b5 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -6,19 +6,11 @@
kaldifeat
=========
-`kaldifeat `_ implements
-feature extraction algorithms **compatible** with kaldi using PyTorch, supporting CUDA
-as well as autograd.
-
-Currently, only fbank features are supported.
-It can produce the same feature output as ``compute-fbank-feats`` (from kaldi)
-when given the same options.
-
-
.. toctree::
:maxdepth: 2
- :caption: Contents:
+ :caption: Contents
+ intro
installation
- usage
+ usage/index
diff --git a/doc/source/installation.rst b/doc/source/installation.rst
index 9e4bfc8..5baa217 100644
--- a/doc/source/installation.rst
+++ b/doc/source/installation.rst
@@ -1,19 +1,38 @@
Installation
============
+ - |os_types|
+ - |python_versions|
+ - |pytorch_versions|
+ - |cuda_versions|
+
+.. caution::
+
+ `kaldifeat`_ depends on `PyTorch`_. `PyTorch`_ >= 1.5.0 is known to work.
+
+ Please first install `PyTorch`_ before you install `kaldifeat`_.
+
+.. hint::
+
+ To install a CPU version of `kaldifeat`_, please install a CPU version
+ of `PyTorch`_.
+
+ To install a CUDA version of `kaldifeat`_, please install a CUDA version
+ of `PyTorch`_. CUDA >= 10.1 is known to work.
+
.. _from source:
Install kaldifeat from source
-----------------------------
-You have to install ``cmake`` and ``PyTorch`` first.
+You have to install ``cmake`` and `PyTorch`_ first.
- ``cmake`` 3.11 is known to work. Other CMake versions may also work.
- - ``PyTorch`` 1.8.1 is known to work. Other PyTorch versions may also work.
+ - `PyTorch`_ >= 1.5.0 is known to work. Other PyTorch versions may also work.
- Python >= 3.6
-The commands to install ``kaldifeat`` from source are:
+The commands to install `kaldifeat`_ from source are:
.. code-block:: bash
@@ -21,7 +40,7 @@ The commands to install ``kaldifeat`` from source are:
cd kaldifeat
python3 setup.py install
-To test that you have installed ``kaldifeat`` successfully, please run:
+To test that you have installed `kaldifeat`_ successfully, please run:
.. code-block:: bash
@@ -29,26 +48,120 @@ To test that you have installed ``kaldifeat`` successfully, please run:
It should print the version, e.g., ``1.0``.
+.. _from PyPI:
+
Install kaldifeat from PyPI
---------------------------
-The pre-built ``kaldifeat`` hosted on PyPI uses PyTorch 1.8.1.
-If you install ``kaldifeat`` using pip, it will replace your locally
-installed PyTorch automatically with PyTorch 1.8.1.
-
-If you don't want this happen, please `Install kaldifeat from source`_.
-
-The command to install ``kaldifeat`` from PyPI is:
+The command to install `kaldifeat`_ from PyPI is:
.. code-block:: bash
- pip install kaldifeat
+ pip install --verbose kaldifeat
-
-To test that you have installed ``kaldifeat`` successfully, please run:
+To test that you have installed `kaldifeat`_ successfully, please run:
.. code-block:: bash
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
It should print the version, e.g., ``1.0``.
+
+Install kaldifeat from conda (Only for Linux)
+---------------------------------------------
+
+.. hint::
+
+ Installation using ``conda`` supports only Linux. For macOS and Windows,
+ please use either :ref:`from source` or :ref:`from PyPI`.
+
+The command to install `kaldifeat` using ``conda`` is
+
+.. code-block:: bash
+
+ conda install -c kaldifeat -c pytorch -c conda-forge kaldifeat python=3.8 cudatoolkit=11.1 pytorch=1.8.1
+
+You can select the supported Python version, CUDA toolkit version and `PyTorch`_ version as you wish.
+
+To install a CPU version of `kaldifeat`, use:
+
+.. code-block:: bash
+
+ conda install -c kaldifeat -c pytorch cpuonly kaldifeat python=3.8 pytorch=1.8.1
+
+.. caution::
+
+ If you encounter issues about missing GLIBC after installing `kaldifeat`_
+ with ``conda``, please consider :ref:`from source` or :ref:`from PyPI`.
+ The reason is that the package was built using Ubuntu 18.04 and your system's
+ GLIBC is older.
+
+
+.. |os_types| image:: ./images/os-green.svg
+ :alt: Supported operating systems
+
+.. |python_versions| image:: ./images/python_ge_3.6-blue.svg
+ :alt: Supported python versions
+
+.. |cuda_versions| image:: ./images/cuda_ge_10.1-orange.svg
+ :alt: Supported cuda versions
+
+.. |pytorch_versions| image:: ./images/pytorch_ge_1.5.0-green.svg
+ :alt: Supported pytorch versions
+
+To test that you have installed `kaldifeat`_ successfully, please run:
+
+.. code-block:: bash
+
+ python3 -c "import kaldifeat; print(kaldifeat.__version__)"
+
+It should print the version, e.g., ``1.0``.
+
+FAQs
+----
+
+How to install a CUDA version of kaldifeat
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You need to first install a CUDA version of `PyTorch`_ and then install `kaldifeat`_.
+
+.. note::
+
+ You can use a CUDA version of `kaldifeat`_ on machines with no GPUs.
+
+How to install a CPU version of kaldifeat
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You need to first install a CPU version of `PyTorch`_ and then install `kaldifeat`_.
+
+How to fix `Caffe2: Cannot find cuDNN library`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block::
+
+ Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN
+ libraries. Please set the proper cuDNN prefixes and / or install cuDNN.
+
+You will have such an error when you want to install a CUDA version of `kaldifeat`_
+by ``pip install kaldifeat`` or from source.
+
+You need to first install cuDNN. Assume you have installed cuDNN to the
+path ``/path/to/cudnn``. You can fix the error by using ``one`` of the following
+commands.
+
+(1) Fix for installation using ``pip install``
+
+.. code-block:: bash
+
+ export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
+ pip install --verbose kaldifeat
+
+(2) Fix for installation from source
+
+.. code-block:: bash
+
+ mkdir /some/path
+ git clone https://github.com/csukuangfj/kaldifeat.git
+ cd kaldifeat
+ export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include"
+ python setup.py install
diff --git a/doc/source/intro.rst b/doc/source/intro.rst
new file mode 100644
index 0000000..6e66c36
--- /dev/null
+++ b/doc/source/intro.rst
@@ -0,0 +1,103 @@
+Introduction
+============
+
+`kaldifeat`_ implements
+speech feature extraction algorithms **compatible** with `Kaldi`_ using `PyTorch`_,
+supporting CUDA as well as autograd.
+
+`kaldifeat`_ has the following features:
+
+ - Fully compatible with `Kaldi`_
+
+ .. note::
+
+ The underlying C++ code is copied & modified from `Kaldi`_ directly.
+ It is rewritten with `PyTorch` C++ APIs.
+
+ - Provide not only ``C++ APIs`` but also ``Python APIs``
+
+ .. note::
+
+ You can access `kaldifeat`_ from ``Python``.
+
+ - Support autograd
+ - Support ``CUDA`` and ``CPU``
+
+ .. note::
+
+ You can use CUDA for feature extraction.
+
+ - Support ``online`` (i.e., ``streaming``) and ``offline`` (i.e., ``non-streaming``)
+ feature extraction
+ - Support chunk-based processing
+
+ .. note::
+
+ This is especially usefull if you want to process audios of several
+ hours long, which may cause OOM if you send them for computation at once.
+ With chunk-based processing, you can process audios of arbirtray length.
+
+ - Support batch processing
+
+ .. note::
+
+ With `kaldifeat`_ you can extract features for a batch of audios
+
+
+.. see https://sublime-and-sphinx-guide.readthedocs.io/en/latest/tables.html
+
+Currently implemented speech features and their counterparts in `Kaldi`_ are
+listed in the following table.
+
+.. list-table:: Supported speech features
+ :widths: 50 50
+ :header-rows: 1
+
+ * - Supported speech features
+ - Counterpart in `Kaldi`_
+ * - `kaldifeat.Fbank`_
+ - `compute-fbank-feats`_
+ * - `kaldifeat.Mfcc`_
+ - `compute-mfcc-feats`_
+ * - `kaldifeat.Plp`_
+ - `compute-plp-feats`_
+ * - `kaldifeat.Spectrogram`_
+ - `compute-spectrogram-feats`_
+ * - `kaldifeat.OnlineFbank`_
+ - `kaldi::OnlineFbank`_
+ * - `kaldifeat.OnlineMfcc`_
+ - `kaldi::OnlineMfcc`_
+ * - `kaldifeat.OnlinePlp`_
+ - `kaldi::OnlinePlp`_
+
+Each feature computer needs an option. The following table lists the options
+for each computer and the corresponding options in `Kaldi`_.
+
+.. hint::
+
+ Note that we reuse the parameter names from `Kaldi`_.
+
+ Also, both online feature computers and offline feature computers share the
+ same option.
+
+.. list-table:: Feature computer options
+ :widths: 50 50
+ :header-rows: 1
+
+ * - Options in `kaldifeat`_
+ - Corresponding options in `Kaldi`_
+ * - `kaldifeat.FbankOptions`_
+ - `kaldi::FbankOptions`_
+ * - `kaldifeat.MfccOptions`_
+ - `kaldi::MfccOptions`_
+ * - `kaldifeat.PlpOptions`_
+ - `kaldi::PlpOptions`_
+ * - `kaldifeat.SpectrogramOptions`_
+ - `kaldi::SpectrogramOptions`_
+ * - `kaldifeat.FrameExtractionOptions`_
+ - `kaldi::FrameExtractionOptions`_
+ * - `kaldifeat.MelBanksOptions`_
+ - `kaldi::MelBanksOptions`_
+
+Read more to learn how to install `kaldifeat`_ and how to use each feature
+computer.
diff --git a/doc/source/usage.rst b/doc/source/usage.rst
deleted file mode 100644
index dd6a770..0000000
--- a/doc/source/usage.rst
+++ /dev/null
@@ -1,212 +0,0 @@
-Usage
-=====
-
-Let us first see the help message of kaldi's ``compute-fbank-feats``:
-
-.. code-block:: bash
-
- $ compute-fbank-feats
-
- Create Mel-filter bank (FBANK) feature files.
- Usage: compute-fbank-feats [options...]
-
- Options:
- --allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
- --allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
- --blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42)
- --channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
- --debug-mel : Print out debugging information for mel bin computation (bool, default = false)
- --dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
- --energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0)
- --frame-length : Frame length in milliseconds (float, default = 25)
- --frame-shift : Frame shift in milliseconds (float, default = 10)
- --high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
- --htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
- --low-freq : Low cutoff frequency for mel bins (float, default = 20)
- --max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
- --min-duration : Minimum duration of segments to process (in seconds). (float, default = 0)
- --num-mel-bins : Number of triangular mel-frequency bins (int, default = 23)
- --output-format : Format of the output files [kaldi, htk] (string, default = "kaldi")
- --preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97)
- --raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true)
- --remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true)
- --round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
- --sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
- --snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
- --subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false)
- --use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false)
- --use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true)
- --use-power : If true, use power, else use magnitude. (bool, default = true)
- --utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
- --vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
- --vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
- --vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
- --vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
- --window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
- --write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
-
- Standard options:
- --config : Configuration file to read (this option may be repeated) (string, default = "")
- --help : Print out usage message (bool, default = false)
- --print-args : Print the command line arguments (to stderr) (bool, default = true)
- --verbose : Verbose level (higher->more logging) (int, default = 0)
-
-FbankOptions
-------------
-
-``kaldifeat`` reuses the same options from kaldi's ``compute-fbank-feats``.
-
-The following shows the default values of ``kaldifeat.FbankOptions``:
-
-.. code-block:: python
-
- >>> import kaldifeat
- >>> fbank_opts = kaldifeat.FbankOptions()
- >>> print(fbank_opts)
- frame_opts:
- samp_freq: 16000
- frame_shift_ms: 10
- frame_length_ms: 25
- dither: 1
- preemph_coeff: 0.97
- remove_dc_offset: 1
- window_type: povey
- round_to_power_of_two: 1
- blackman_coeff: 0.42
- snip_edges: 1
-
-
- mel_opts:
- num_bins: 23
- low_freq: 20
- high_freq: 0
- vtln_low: 100
- vtln_high: -500
- debug_mel: 0
- htk_mode: 0
-
- use_energy: 0
- energy_floor: 0
- raw_energy: 1
- htk_compat: 0
- use_log_fbank: 1
- use_power: 1
- device: cpu
-
-It consists of three parts:
-
- - ``frame_opts``
-
- Options in this part are accessed by ``frame_opts.xxx``. That is, to access
- the sample rate, you use:
-
- .. code-block:: python
-
- >>> fbank_opts = kaldifeat.FbankOptions()
- >>> print(fbank_opts.frame_opts.samp_freq)
- 16000.0
-
- - ``mel_opts``
-
- Options in this part are accessed by ``mel_opts.xxx``. That is, to access
- the number of mel bins, you use:
-
- .. code-block:: python
-
- >>> fbank_opts = kaldifeat.FbankOptions()
- >>> print(fbank_opts.mel_opts.num_bins)
- 23
-
- - fbank related
-
- Options in this part are accessed directly. That is, to access the device
- field, you use:
-
- .. code-block::
-
- >>> print(fbank_opts.device)
- cpu
- >>> fbank_opts.device = 'cuda:0'
- >>> print(fbank_opts.device)
- cuda:0
- >>> import torch
- >>> fbank_opts.device = torch.device('cuda', 0)
- >>> print(fbank_opts.device)
- cuda:0
-
-
-
-To change the sample rate to 8000, you can use:
-
-.. code-block:: python
-
- >>> fbank_opts = kaldifeat.FbankOptions()
- >>> print(fbank_opts.frame_opts.samp_freq)
- 16000.0
- >>> fbank_opts.frame_opts.samp_freq = 8000
- >>> print(fbank_opts.frame_opts.samp_freq)
- 8000.0
-
-To change ``snip_edges`` to ``False``, you can use:
-
-.. code-block:: python
-
- >>> fbank_opts.frame_opts.snip_edges = False
- >>> print(fbank_opts.frame_opts.snip_edges)
- False
-
-To change number of mel bins to 80, you can use:
-
-.. code-block:: python
-
- >>> print(fbank_opts.mel_opts.num_bins)
- 23
- >>> fbank_opts.mel_opts.num_bins = 80
- >>> print(fbank_opts.mel_opts.num_bins)
- 80
-
-To change the device to ``cuda``, you can use:
-
-
-Fbank
------
-
-The following shows how to use ``kaldifeat.Fbank`` to compute
-the fbank features of sound files.
-
-First, let us generate two sound files using ``sox``:
-
-.. code-block:: bash
-
- # generate a wav of two seconds, containing a sine-wave
- # swept from 300 Hz to 3300 Hz
- sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300
-
- # another sound file with 0.5 seconds
- sox -n -r 16000 -b 16 test2.wav synth 0.5 sine 300-3300
-
-.. hint::
-
- You can find the above two files by visiting the following two links:
-
- - `test.wav `_
- - `test2.wav `_
-
-The `following code `_
-shows the usage of ``kaldifeat.Fbank``.
-
-It shows:
-
- - How to read a sound file. Note that audio samples are scaled to the range [-32768, 32768].
- The intention is to produce the same output as kaldi. You don't need to scale it if
- you don't care about the compatibility with kaldi
-
- - ``kaldifeat.Fbank`` supports CUDA as well as CPU
-
- - ``kaldifeat.Fbank`` supports processing sound file in a batch as well as accepting
- a single sound file
-
-
-.. literalinclude:: ./code/test_fbank.py
- :caption: Demo of ``kaldifeat.Fbank``
- :language: python
diff --git a/doc/source/usage/code/compute-fbank-feats-help.txt b/doc/source/usage/code/compute-fbank-feats-help.txt
new file mode 100644
index 0000000..3922636
--- /dev/null
+++ b/doc/source/usage/code/compute-fbank-feats-help.txt
@@ -0,0 +1,46 @@
+compute-fbank-feats
+
+Create Mel-filter bank (FBANK) feature files.
+Usage: compute-fbank-feats [options...]
+
+Options:
+ --allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false)
+ --allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false)
+ --blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42)
+ --channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1)
+ --debug-mel : Print out debugging information for mel bin computation (bool, default = false)
+ --dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1)
+ --energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0)
+ --frame-length : Frame length in milliseconds (float, default = 25)
+ --frame-shift : Frame shift in milliseconds (float, default = 10)
+ --high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0)
+ --htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false)
+ --low-freq : Low cutoff frequency for mel bins (float, default = 20)
+ --max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1)
+ --min-duration : Minimum duration of segments to process (in seconds). (float, default = 0)
+ --num-mel-bins : Number of triangular mel-frequency bins (int, default = 23)
+ --output-format : Format of the output files [kaldi, htk] (string, default = "kaldi")
+ --preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97)
+ --raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true)
+ --remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true)
+ --round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true)
+ --sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000)
+ --snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true)
+ --subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false)
+ --use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false)
+ --use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true)
+ --use-power : If true, use power, else use magnitude. (bool, default = true)
+ --utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "")
+ --vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500)
+ --vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100)
+ --vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "")
+ --vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1)
+ --window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey")
+ --write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "")
+
+Standard options:
+ --config : Configuration file to read (this option may be repeated) (string, default = "")
+ --help : Print out usage message (bool, default = false)
+ --print-args : Print the command line arguments (to stderr) (bool, default = true)
+ --verbose : Verbose level (higher->more logging) (int, default = 0)
+
diff --git a/doc/source/usage/code/fbank_options-1.txt b/doc/source/usage/code/fbank_options-1.txt
new file mode 100644
index 0000000..7e0470a
--- /dev/null
+++ b/doc/source/usage/code/fbank_options-1.txt
@@ -0,0 +1,65 @@
+$ python3
+Python 3.8.0 (default, Oct 28 2019, 16:14:01)
+[GCC 8.3.0] on linux
+Type "help", "copyright", "credits" or "license" for more information.
+>>> import kaldifeat
+>>> opts = kaldifeat.FbankOptions()
+>>> print(opts)
+frame_opts:
+samp_freq: 16000
+frame_shift_ms: 10
+frame_length_ms: 25
+dither: 1
+preemph_coeff: 0.97
+remove_dc_offset: 1
+window_type: povey
+round_to_power_of_two: 1
+blackman_coeff: 0.42
+snip_edges: 1
+max_feature_vectors: -1
+
+
+mel_opts:
+num_bins: 23
+low_freq: 20
+high_freq: 0
+vtln_low: 100
+vtln_high: -500
+debug_mel: 0
+htk_mode: 0
+
+use_energy: 0
+energy_floor: 0
+raw_energy: 1
+htk_compat: 0
+use_log_fbank: 1
+use_power: 1
+device: cpu
+
+>>> print(opts.dither)
+Traceback (most recent call last):
+ File "", line 1, in
+AttributeError: '_kaldifeat.FbankOptions' object has no attribute 'dither'
+>>>
+>>> print(opts.frame_opts.dither)
+1.0
+>>> opts.frame_opts.dither = 0 # disable dither
+>>> print(opts.frame_opts.dither)
+0.0
+>>> import torch
+>>> print(opts.device)
+cpu
+>>> opts.device = 'cuda:0'
+>>> print(opts.device)
+cuda:0
+>>> opts.device = torch.device('cuda', 1)
+>>> print(opts.device)
+cuda:1
+>>> opts.device = 'cpu'
+>>> print(opts.device)
+cpu
+>>> print(opts.mel_opts.num_bins)
+23
+>>> opts.mel_opts.num_bins = 80
+>>> print(opts.mel_opts.num_bins)
+80
diff --git a/doc/source/usage/code/test_fbank_options.py b/doc/source/usage/code/test_fbank_options.py
new file mode 120000
index 0000000..3bfe0fa
--- /dev/null
+++ b/doc/source/usage/code/test_fbank_options.py
@@ -0,0 +1 @@
+../../../../kaldifeat/python/tests/test_fbank_options.py
\ No newline at end of file
diff --git a/doc/source/usage/fbank.rst b/doc/source/usage/fbank.rst
new file mode 100644
index 0000000..e3f1351
--- /dev/null
+++ b/doc/source/usage/fbank.rst
@@ -0,0 +1,3 @@
+kaldifeat.Fbank
+===============
+
diff --git a/doc/source/usage/fbank_options.rst b/doc/source/usage/fbank_options.rst
new file mode 100644
index 0000000..d9adc2d
--- /dev/null
+++ b/doc/source/usage/fbank_options.rst
@@ -0,0 +1,52 @@
+kaldifeat.FbankOptions
+======================
+
+If you want to construct an instance of `kaldifeat.Fbank`_ or
+`kaldifeat.OnlineFbank`_, you have to provide an instance of
+`kaldifeat.FbankOptions`_.
+
+The following code shows how to construct an instance of `kaldifeat.FbankOptions`_.
+
+.. literalinclude:: ./code/fbank_options-1.txt
+ :caption: Usage of `kaldifeat.FbankOptions`_
+ :emphasize-lines: 6,8,22,37
+ :language: python
+
+Note that we reuse the same option name with `compute-fbank-feats`_ from `Kaldi`_:
+
+.. code-block:: bash
+
+ $ compute-fbank-feats --help
+
+
+.. literalinclude:: ./code/compute-fbank-feats-help.txt
+ :caption: Output of ``compute-fbank-feats --help``
+
+Please refer to the output of ``compute-fbank-feats --help`` for the meaning
+of each field of `kaldifeat.FbankOptions`_.
+
+One thing worth noting is that `kaldifeat.FbankOptions`_ has a field ``device``,
+which is an instance of ``torch.device``. You can assign it either a string, e.g.,
+``"cpu"`` or ``"cuda:0"``, or an instance of ``torch.device``, e.g., ``torch.device("cpu")`` or
+``torch.device("cuda", 1)``.
+
+.. hint::
+
+ You can use this field to control whether the feature computer
+ constructed from it performs computation on CPU or CUDA.
+
+.. caution::
+
+ If you use a CUDA device, make sure that you have installed a CUDA version
+ of `PyTorch`_.
+
+Example usage
+-------------
+
+The following code from
+``_
+demonstrate the usage of `kaldifeat.FbankOptions`_:
+
+.. literalinclude:: ./code/test_fbank_options.py
+ :caption: Example usage of `kaldifeat.FbankOptions`_
+ :language: python
diff --git a/doc/source/usage/index.rst b/doc/source/usage/index.rst
new file mode 100644
index 0000000..f40dcd5
--- /dev/null
+++ b/doc/source/usage/index.rst
@@ -0,0 +1,11 @@
+Usage
+=====
+
+This section describes how to use feature computers in `kaldifeat`_.
+
+.. toctree::
+ :maxdepth: 2
+
+ fbank_options
+ fbank
+ online_fbank
diff --git a/doc/source/usage/online_fbank.rst b/doc/source/usage/online_fbank.rst
new file mode 100644
index 0000000..557104d
--- /dev/null
+++ b/doc/source/usage/online_fbank.rst
@@ -0,0 +1,3 @@
+kaldifeat.OnlineFbank
+=====================
+