diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index 74c255a..3faf80e 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -1,3 +1,19 @@ +# Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) + +# See ../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# name: Publish to PyPI on: @@ -7,30 +23,85 @@ on: jobs: pypi: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-18.04] + cuda: ["10.1"] + gcc: ["5"] + torch: ["1.8.1"] + python-version: [3.6, 3.7, 3.8] steps: - uses: actions/checkout@v2 + with: + fetch-depth: 0 - name: Setup Python uses: actions/setup-python@v2 with: - python-version: 3.6 + python-version: ${{ matrix.python-version }} - - name: Install Python dependencies + - name: Install CUDA Toolkit ${{ matrix.cuda }} + shell: bash + env: + cuda: ${{ matrix.cuda }} + run: | + source ./scripts/github_actions/install_cuda.sh + echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV + echo "${CUDA_HOME}/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + + - name: Display NVCC version + run: | + which nvcc + nvcc --version + + - name: Install GCC ${{ matrix.gcc }} + run: | + sudo apt-get install -y gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} + echo "CC=/usr/bin/gcc-${{ matrix.gcc }}" >> $GITHUB_ENV + echo "CXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV + echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}" >> $GITHUB_ENV + + - name: Install PyTorch ${{ matrix.torch }} + env: + cuda: ${{ matrix.cuda }} + torch: ${{ matrix.torch }} shell: bash run: | python3 -m pip install --upgrade pip - python3 -m pip install wheel twine setuptools + python3 -m pip install wheel twine typing_extensions + python3 -m pip install bs4 requests tqdm - - name: Build - shell: bash + ./scripts/github_actions/install_torch.sh + python3 -c "import torch; print('torch version:', torch.__version__)" + + - name: Download cudnn 8.0 + env: + cuda: ${{ matrix.cuda }} run: | - python3 setup.py sdist - ls -l dist/* + ./scripts/github_actions/install_cudnn.sh + + - name: Build pip packages + shell: bash + env: + KALDIFEAT_IS_FOR_PYPI: 1 + run: | + tag=$(python3 -c "import sys; print(''.join(sys.version[:3].split('.')))") + export KALDIFEAT_MAKE_ARGS="-j2" + python3 setup.py bdist_wheel --python-tag=py${tag} + ls -lh dist/ - name: Publish wheels to PyPI env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | - twine upload dist/kaldifeat-*.tar.gz + twine upload dist/kaldifeat-*.whl + + - name: Upload Wheel + uses: actions/upload-artifact@v2 + with: + name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-${{ matrix.os }} + path: dist/*.whl diff --git a/README.md b/README.md index 744bdf9..d675805 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,36 @@ Wrap kaldi's feature computations to Python with PyTorch support. # Installation -`kaldifeat` can be installed by +## From PyPi with pip + +If you install `kaldifeat` using `pip`, it will also install +PyTorch 1.8.1. If this is not what you want, please install `kaldifeat` +from source (see below). ```bash pip install kaldifeat ``` -# TODOs +## From source -- [ ] Add Python interface -- [ ] Support torch.device so that it can switch between CUDA and CPU -- [ ] Add unit tests -- [ ] Set up GitHub actions -- [ ] Benchmark its speed and compare it with Kaldi -- [ ] Support batch processing of multiple waves -- [ ] Handle non-default parameters -- [ ] Support MFCC and other features available in Kaldi -- [ ] Publish it to PyPI +The following are the commands to compile `kaldifeat` from source. +We assume that you have installed `cmake` and PyTorch. +cmake 3.11 is known to work. Other cmake versions may also work. +PyTorch 1.8.1 is known to work. Other PyTorch versions may also work. + +```bash +mkdir /some/path +git clone https://github.com/csukuangfj/kaldifeat.git +cd kaldifeat +python setup.py install +``` + +To test whether `kaldifeat` was installed successfully, you can run: +``` +python3 -c "import kaldifeat; print(kaldifeat.__version__)" +``` + +## Usage + +Please refer to +for how to use `kaldifeat`. diff --git a/cmake/cmake_extension.py b/cmake/cmake_extension.py index 9f2d879..413a87d 100644 --- a/cmake/cmake_extension.py +++ b/cmake/cmake_extension.py @@ -9,6 +9,12 @@ from pathlib import Path import setuptools from setuptools.command.build_ext import build_ext + +def is_for_pypi(): + ans = os.environ.get("KALDIFEAT_IS_FOR_PYPI", None) + return ans is not None + + try: from wheel.bdist_wheel import bdist_wheel as _bdist_wheel @@ -17,11 +23,12 @@ try: _bdist_wheel.finalize_options(self) # In this case, the generated wheel has a name in the form # k2-xxx-pyxx-none-any.whl - # self.root_is_pure = True - - # The generated wheel has a name ending with - # -linux_x86_64.whl - self.root_is_pure = False + if is_for_pypi(): + self.root_is_pure = True + else: + # The generated wheel has a name ending with + # -linux_x86_64.whl + self.root_is_pure = False except ImportError: diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..6247f7e --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/requirements.txt b/doc/requirements.txt new file mode 100644 index 0000000..9d9bf4b --- /dev/null +++ b/doc/requirements.txt @@ -0,0 +1,6 @@ +dataclasses +recommonmark +sphinx +sphinx-autodoc-typehints +sphinx_rtd_theme +sphinxcontrib-bibtex diff --git a/doc/source/code/test_fbank.py b/doc/source/code/test_fbank.py new file mode 100755 index 0000000..0f39a1c --- /dev/null +++ b/doc/source/code/test_fbank.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +# Copyright 2021 Xiaomi Corporation (authors: Fangjun Kuang) + +import numpy as np +import soundfile as sf +import torch + +import kaldifeat + + +def read_wave(filename) -> torch.Tensor: + """Read a wave file and return it as a 1-D tensor. + + Note: + You don't need to scale it to [-32768, 32767]. + We use scaling here to follow the approach in Kaldi. + + Args: + filename: + Filename of a sound file. + Returns: + Return a 1-D tensor containing audio samples. + """ + with sf.SoundFile(filename) as sf_desc: + sampling_rate = sf_desc.samplerate + assert sampling_rate == 16000 + data = sf_desc.read(dtype=np.float32, always_2d=False) + data *= 32768 + return torch.from_numpy(data) + + +def test_fbank(): + device = torch.device("cpu") + if torch.cuda.is_available(): + device = torch.device("cuda", 0) + + wave0 = read_wave("test_data/test.wav") + wave1 = read_wave("test_data/test2.wav") + + wave0 = wave0.to(device) + wave1 = wave1.to(device) + + opts = kaldifeat.FbankOptions() + opts.frame_opts.dither = 0 + opts.device = device + + fbank = kaldifeat.Fbank(opts) + + # We can compute fbank features in batches + features = fbank([wave0, wave1]) + assert isinstance(features, list), f"{type(features)}" + assert len(features) == 2 + + # We can also compute fbank features for a single wave + features0 = fbank(wave0) + features1 = fbank(wave1) + + assert torch.allclose(features[0], features0) + assert torch.allclose(features[1], features1) + + # To compute fbank features for only a specified frame + audio_frames = fbank.convert_samples_to_frames(wave0) + feature_frame_1 = fbank.compute(audio_frames[1]) + feature_frame_10 = fbank.compute(audio_frames[10]) + + assert torch.allclose(features0[1], feature_frame_1) + assert torch.allclose(features0[10], feature_frame_10) + + +if __name__ == "__main__": + test_fbank() diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000..2ec9ca6 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,104 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +import re + +import sphinx_rtd_theme + +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = "kaldifeat" +copyright = "2021, Fangjun Kuang" +author = "Fangjun Kuang" + + +def get_version(): + cmake_file = "../../CMakeLists.txt" + with open(cmake_file) as f: + content = f.read() + + version = re.search(r"set\(kaldifeat_VERSION (.*)\)", content).group(1) + return version.strip('"') + + +version = get_version() +release = version + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "recommonmark", + "sphinx.ext.autodoc", + "sphinx.ext.githubpages", + "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", + "sphinx_rtd_theme", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} +master_doc = "index" + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +html_show_sourcelink = True + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +pygments_style = "sphinx" + +numfig = True + +html_context = { + "display_github": True, + "github_user": "csukuangfj", + "github_repo": "kaldifeat", + "github_version": "master", + "conf_py_path": "/kaldifeat/docs/source/", +} + +# refer to +# https://sphinx-rtd-theme.readthedocs.io/en/latest/configuring.html +html_theme_options = { + "logo_only": False, + "display_version": True, + "prev_next_buttons_location": "bottom", + "style_external_links": True, +} diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000..436e1b8 --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,24 @@ +.. kaldifeat documentation master file, created by + sphinx-quickstart on Fri Jul 16 20:15:27 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +kaldifeat +========= + +`kaldifeat `_ implements +feature extraction algorithms **compatible** with kaldi using PyTorch, supporting CUDA +as well as autograd. + +Currently, only fbank features are supported. +It can produce the same feature output as ``compute-fbank-feats`` (from kaldi) +when given the same options. + + + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + installation + usage diff --git a/doc/source/installation.rst b/doc/source/installation.rst new file mode 100644 index 0000000..9e4bfc8 --- /dev/null +++ b/doc/source/installation.rst @@ -0,0 +1,54 @@ +Installation +============ + +.. _from source: + +Install kaldifeat from source +----------------------------- + +You have to install ``cmake`` and ``PyTorch`` first. + + - ``cmake`` 3.11 is known to work. Other CMake versions may also work. + - ``PyTorch`` 1.8.1 is known to work. Other PyTorch versions may also work. + - Python >= 3.6 + + +The commands to install ``kaldifeat`` from source are: + +.. code-block:: bash + + git clone https://github.com/csukuangfj/kaldifeat + cd kaldifeat + python3 setup.py install + +To test that you have installed ``kaldifeat`` successfully, please run: + +.. code-block:: bash + + python3 -c "import kaldifeat; print(kaldifeat.__version__)" + +It should print the version, e.g., ``1.0``. + +Install kaldifeat from PyPI +--------------------------- + +The pre-built ``kaldifeat`` hosted on PyPI uses PyTorch 1.8.1. +If you install ``kaldifeat`` using pip, it will replace your locally +installed PyTorch automatically with PyTorch 1.8.1. + +If you don't want this happen, please `Install kaldifeat from source`_. + +The command to install ``kaldifeat`` from PyPI is: + +.. code-block:: bash + + pip install kaldifeat + + +To test that you have installed ``kaldifeat`` successfully, please run: + +.. code-block:: bash + + python3 -c "import kaldifeat; print(kaldifeat.__version__)" + +It should print the version, e.g., ``1.0``. diff --git a/doc/source/usage.rst b/doc/source/usage.rst new file mode 100644 index 0000000..dd6a770 --- /dev/null +++ b/doc/source/usage.rst @@ -0,0 +1,212 @@ +Usage +===== + +Let us first see the help message of kaldi's ``compute-fbank-feats``: + +.. code-block:: bash + + $ compute-fbank-feats + + Create Mel-filter bank (FBANK) feature files. + Usage: compute-fbank-feats [options...] + + Options: + --allow-downsample : If true, allow the input waveform to have a higher frequency than the specified --sample-frequency (and we'll downsample). (bool, default = false) + --allow-upsample : If true, allow the input waveform to have a lower frequency than the specified --sample-frequency (and we'll upsample). (bool, default = false) + --blackman-coeff : Constant coefficient for generalized Blackman window. (float, default = 0.42) + --channel : Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right) (int, default = -1) + --debug-mel : Print out debugging information for mel bin computation (bool, default = false) + --dither : Dithering constant (0.0 means no dither). If you turn this off, you should set the --energy-floor option, e.g. to 1.0 or 0.1 (float, default = 1) + --energy-floor : Floor on energy (absolute, not relative) in FBANK computation. Only makes a difference if --use-energy=true; only necessary if --dither=0.0. Suggested values: 0.1 or 1.0 (float, default = 0) + --frame-length : Frame length in milliseconds (float, default = 25) + --frame-shift : Frame shift in milliseconds (float, default = 10) + --high-freq : High cutoff frequency for mel bins (if <= 0, offset from Nyquist) (float, default = 0) + --htk-compat : If true, put energy last. Warning: not sufficient to get HTK compatible features (need to change other parameters). (bool, default = false) + --low-freq : Low cutoff frequency for mel bins (float, default = 20) + --max-feature-vectors : Memory optimization. If larger than 0, periodically remove feature vectors so that only this number of the latest feature vectors is retained. (int, default = -1) + --min-duration : Minimum duration of segments to process (in seconds). (float, default = 0) + --num-mel-bins : Number of triangular mel-frequency bins (int, default = 23) + --output-format : Format of the output files [kaldi, htk] (string, default = "kaldi") + --preemphasis-coefficient : Coefficient for use in signal preemphasis (float, default = 0.97) + --raw-energy : If true, compute energy before preemphasis and windowing (bool, default = true) + --remove-dc-offset : Subtract mean from waveform on each frame (bool, default = true) + --round-to-power-of-two : If true, round window size to power of two by zero-padding input to FFT. (bool, default = true) + --sample-frequency : Waveform data sample frequency (must match the waveform file, if specified there) (float, default = 16000) + --snip-edges : If true, end effects will be handled by outputting only frames that completely fit in the file, and the number of frames depends on the frame-length. If false, the number of frames depends only on the frame-shift, and we reflect the data at the ends. (bool, default = true) + --subtract-mean : Subtract mean of each feature file [CMS]; not recommended to do it this way. (bool, default = false) + --use-energy : Add an extra dimension with energy to the FBANK output. (bool, default = false) + --use-log-fbank : If true, produce log-filterbank, else produce linear. (bool, default = true) + --use-power : If true, use power, else use magnitude. (bool, default = true) + --utt2spk : Utterance to speaker-id map (if doing VTLN and you have warps per speaker) (string, default = "") + --vtln-high : High inflection point in piecewise linear VTLN warping function (if negative, offset from high-mel-freq (float, default = -500) + --vtln-low : Low inflection point in piecewise linear VTLN warping function (float, default = 100) + --vtln-map : Map from utterance or speaker-id to vtln warp factor (rspecifier) (string, default = "") + --vtln-warp : Vtln warp factor (only applicable if vtln-map not specified) (float, default = 1) + --window-type : Type of window ("hamming"|"hanning"|"povey"|"rectangular"|"sine"|"blackmann") (string, default = "povey") + --write-utt2dur : Wspecifier to write duration of each utterance in seconds, e.g. 'ark,t:utt2dur'. (string, default = "") + + Standard options: + --config : Configuration file to read (this option may be repeated) (string, default = "") + --help : Print out usage message (bool, default = false) + --print-args : Print the command line arguments (to stderr) (bool, default = true) + --verbose : Verbose level (higher->more logging) (int, default = 0) + +FbankOptions +------------ + +``kaldifeat`` reuses the same options from kaldi's ``compute-fbank-feats``. + +The following shows the default values of ``kaldifeat.FbankOptions``: + +.. code-block:: python + + >>> import kaldifeat + >>> fbank_opts = kaldifeat.FbankOptions() + >>> print(fbank_opts) + frame_opts: + samp_freq: 16000 + frame_shift_ms: 10 + frame_length_ms: 25 + dither: 1 + preemph_coeff: 0.97 + remove_dc_offset: 1 + window_type: povey + round_to_power_of_two: 1 + blackman_coeff: 0.42 + snip_edges: 1 + + + mel_opts: + num_bins: 23 + low_freq: 20 + high_freq: 0 + vtln_low: 100 + vtln_high: -500 + debug_mel: 0 + htk_mode: 0 + + use_energy: 0 + energy_floor: 0 + raw_energy: 1 + htk_compat: 0 + use_log_fbank: 1 + use_power: 1 + device: cpu + +It consists of three parts: + + - ``frame_opts`` + + Options in this part are accessed by ``frame_opts.xxx``. That is, to access + the sample rate, you use: + + .. code-block:: python + + >>> fbank_opts = kaldifeat.FbankOptions() + >>> print(fbank_opts.frame_opts.samp_freq) + 16000.0 + + - ``mel_opts`` + + Options in this part are accessed by ``mel_opts.xxx``. That is, to access + the number of mel bins, you use: + + .. code-block:: python + + >>> fbank_opts = kaldifeat.FbankOptions() + >>> print(fbank_opts.mel_opts.num_bins) + 23 + + - fbank related + + Options in this part are accessed directly. That is, to access the device + field, you use: + + .. code-block:: + + >>> print(fbank_opts.device) + cpu + >>> fbank_opts.device = 'cuda:0' + >>> print(fbank_opts.device) + cuda:0 + >>> import torch + >>> fbank_opts.device = torch.device('cuda', 0) + >>> print(fbank_opts.device) + cuda:0 + + + +To change the sample rate to 8000, you can use: + +.. code-block:: python + + >>> fbank_opts = kaldifeat.FbankOptions() + >>> print(fbank_opts.frame_opts.samp_freq) + 16000.0 + >>> fbank_opts.frame_opts.samp_freq = 8000 + >>> print(fbank_opts.frame_opts.samp_freq) + 8000.0 + +To change ``snip_edges`` to ``False``, you can use: + +.. code-block:: python + + >>> fbank_opts.frame_opts.snip_edges = False + >>> print(fbank_opts.frame_opts.snip_edges) + False + +To change number of mel bins to 80, you can use: + +.. code-block:: python + + >>> print(fbank_opts.mel_opts.num_bins) + 23 + >>> fbank_opts.mel_opts.num_bins = 80 + >>> print(fbank_opts.mel_opts.num_bins) + 80 + +To change the device to ``cuda``, you can use: + + +Fbank +----- + +The following shows how to use ``kaldifeat.Fbank`` to compute +the fbank features of sound files. + +First, let us generate two sound files using ``sox``: + +.. code-block:: bash + + # generate a wav of two seconds, containing a sine-wave + # swept from 300 Hz to 3300 Hz + sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300 + + # another sound file with 0.5 seconds + sox -n -r 16000 -b 16 test2.wav synth 0.5 sine 300-3300 + +.. hint:: + + You can find the above two files by visiting the following two links: + + - `test.wav `_ + - `test2.wav `_ + +The `following code `_ +shows the usage of ``kaldifeat.Fbank``. + +It shows: + + - How to read a sound file. Note that audio samples are scaled to the range [-32768, 32768]. + The intention is to produce the same output as kaldi. You don't need to scale it if + you don't care about the compatibility with kaldi + + - ``kaldifeat.Fbank`` supports CUDA as well as CPU + + - ``kaldifeat.Fbank`` supports processing sound file in a batch as well as accepting + a single sound file + + +.. literalinclude:: ./code/test_fbank.py + :caption: Demo of ``kaldifeat.Fbank`` + :language: python diff --git a/kaldifeat/python/kaldifeat/__init__.py b/kaldifeat/python/kaldifeat/__init__.py index e177288..f86941f 100644 --- a/kaldifeat/python/kaldifeat/__init__.py +++ b/kaldifeat/python/kaldifeat/__init__.py @@ -1,3 +1,4 @@ +import torch from _kaldifeat import FbankOptions, FrameExtractionOptions, MelBanksOptions from .fbank import Fbank diff --git a/scripts/github_actions/install_cuda.sh b/scripts/github_actions/install_cuda.sh new file mode 100755 index 0000000..7d023b9 --- /dev/null +++ b/scripts/github_actions/install_cuda.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "cuda version: $cuda" + +case "$cuda" in + 10.0) + url=https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux + ;; + 10.1) + # WARNING: there are bugs in + # https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.105_418.39_linux.run + # with GCC 7. Please use the following version + url=http://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.243_418.87.00_linux.run + ;; + 10.2) + url=http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run + ;; + 11.0) + url=http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda_11.0.2_450.51.05_linux.run + ;; + 11.1) + # url=https://developer.download.nvidia.com/compute/cuda/11.1.0/local_installers/cuda_11.1.0_455.23.05_linux.run + url=https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda_11.1.1_455.32.00_linux.run + ;; + *) + echo "Unknown cuda version: $cuda" + exit 1 + ;; +esac + +function retry() { + $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) +} + +retry curl -LSs -O $url +filename=$(basename $url) +echo "filename: $filename" +chmod +x ./$filename +sudo ./$filename --toolkit --silent +rm -fv ./$filename + +export CUDA_HOME=/usr/local/cuda +export PATH=$CUDA_HOME/bin:$PATH +export LD_LIBRARY_PATH=$CUDA_HOME/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH diff --git a/scripts/github_actions/install_cudnn.sh b/scripts/github_actions/install_cudnn.sh new file mode 100755 index 0000000..853eba5 --- /dev/null +++ b/scripts/github_actions/install_cudnn.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# +# Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +case $cuda in + 10.0) + filename=cudnn-10.0-linux-x64-v7.6.5.32.tgz + url=http://www.mediafire.com/file/1037lb1vmj9qdtq/cudnn-10.0-linux-x64-v7.6.5.32.tgz/file + ;; + 10.1) + filename=cudnn-10.1-linux-x64-v8.0.2.39.tgz + url=http://www.mediafire.com/file/fnl2wg0h757qhd7/cudnn-10.1-linux-x64-v8.0.2.39.tgz/file + ;; + 10.2) + filename=cudnn-10.2-linux-x64-v8.0.2.39.tgz + url=http://www.mediafire.com/file/sc2nvbtyg0f7ien/cudnn-10.2-linux-x64-v8.0.2.39.tgz/file + ;; + 11.0) + filename=cudnn-11.0-linux-x64-v8.0.5.39.tgz + url=https://www.mediafire.com/file/abyhnls106ko9kp/cudnn-11.0-linux-x64-v8.0.5.39.tgz/file + ;; + 11.1) + filename=cudnn-11.1-linux-x64-v8.0.5.39.tgz + url=https://www.mediafire.com/file/qx55zd65773xonv/cudnn-11.1-linux-x64-v8.0.5.39.tgz/file + ;; + *) + echo "Unsupported cuda version: $cuda" + exit 1 + ;; +esac + +function retry() { + $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) +} + +# It is forked from https://github.com/Juvenal-Yescas/mediafire-dl +# https://github.com/Juvenal-Yescas/mediafire-dl/pull/2 changes the filename and breaks the CI. +# We use a separate fork to keep the link fixed. +retry wget https://raw.githubusercontent.com/csukuangfj/mediafire-dl/master/mediafire_dl.py + +sed -i 's/quiet=False/quiet=True/' mediafire_dl.py +retry python3 mediafire_dl.py "$url" +sudo tar xf ./$filename -C /usr/local +rm -v ./$filename + +sudo sed -i '59i#define CUDNN_MAJOR 8' /usr/local/cuda/include/cudnn.h diff --git a/scripts/github_actions/install_torch.sh b/scripts/github_actions/install_torch.sh new file mode 100755 index 0000000..3ad1717 --- /dev/null +++ b/scripts/github_actions/install_torch.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# +# Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +case ${torch} in + 1.5.*) + case ${cuda} in + 10.1) + package="torch==${torch}+cu101" + url=https://download.pytorch.org/whl/torch_stable.html + ;; + 10.2) + package="torch==${torch}" + # Leave url empty to use PyPI. + # torch_stable provides cu92 but we want cu102 + url= + ;; + esac + ;; + 1.6.0) + case ${cuda} in + 10.1) + package="torch==1.6.0+cu101" + url=https://download.pytorch.org/whl/torch_stable.html + ;; + 10.2) + package="torch==1.6.0" + # Leave it empty to use PyPI. + # torch_stable provides cu92 but we want cu102 + url= + ;; + esac + ;; + 1.7.*) + case ${cuda} in + 10.1) + package="torch==${torch}+cu101" + url=https://download.pytorch.org/whl/torch_stable.html + ;; + 10.2) + package="torch==${torch}" + # Leave it empty to use PyPI. + # torch_stable provides cu92 but we want cu102 + url= + ;; + 11.0) + package="torch==${torch}+cu110" + url=https://download.pytorch.org/whl/torch_stable.html + ;; + esac + ;; + 1.8.*) + case ${cuda} in + 10.1) + package="torch==${torch}+cu101" + url=https://download.pytorch.org/whl/torch_stable.html + ;; + 10.2) + package="torch==${torch}" + # Leave it empty to use PyPI. + url= + ;; + 11.1) + package="torch==${torch}+cu111" + url=https://download.pytorch.org/whl/torch_stable.html + ;; + esac + ;; + 1.9.0) + case ${cuda} in + 10.2) + package="torch==${torch}" + # Leave it empty to use PyPI. + url= + ;; + 11.1) + package="torch==${torch}+cu111" + url=https://download.pytorch.org/whl/torch_stable.html + ;; + esac + ;; + *) + echo "Unsupported PyTorch version: ${torch}" + exit 1 + ;; +esac + +function retry() { + $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) +} + +if [ x"${url}" == "x" ]; then + retry python3 -m pip install -q $package +else + retry python3 -m pip install -q $package -f $url +fi