mirror of
https://github.com/csukuangfj/kaldifeat.git
synced 2025-08-09 18:12:17 +00:00
Merge pull request #4 from csukuangfj/doc
Update README and release v1.2
This commit is contained in:
commit
2b55159caa
@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
|
||||
|
||||
project(kaldifeat)
|
||||
|
||||
set(kaldifeat_VERSION "1.1")
|
||||
set(kaldifeat_VERSION "1.2")
|
||||
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
|
||||
|
125
README.md
125
README.md
@ -1,11 +1,121 @@
|
||||
# kaldifeat
|
||||
|
||||
Feature extraction compatible with kaldi using PyTorch, supporting
|
||||
CUDA, batch process, and autograd.
|
||||
Feature extraction compatible with `Kaldi` using PyTorch, supporting
|
||||
CUDA, batch processing, chunk processing, and autograd.
|
||||
|
||||
See [colab notebook][1] for example usage.
|
||||
|
||||
[1]: https://colab.research.google.com/drive/16LXJ5QlqO0mQRYK4q1NxOBdM2yL0CYNJ#scrollTo=dTYi4IVJro0j
|
||||
# Usage
|
||||
|
||||
Let us first generate a test wave using sox:
|
||||
|
||||
```bash
|
||||
# generate a wave of 1.2 seconds, containing a sine-wave
|
||||
# swept from 300 Hz to 3300 Hz
|
||||
sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300
|
||||
```
|
||||
|
||||
**HINT**: Download [test_wav][test_wav].
|
||||
|
||||
[test_wav]: kaldifeat/python/tests/test_data/test.wav
|
||||
|
||||
## Fbank
|
||||
|
||||
```python
|
||||
import torchaudio
|
||||
|
||||
import kaldifeat
|
||||
|
||||
filename = "./test.wav"
|
||||
wave, samp_freq = torchaudio.load(filename)
|
||||
|
||||
wave = wave.squeeze()
|
||||
|
||||
opts = kaldifeat.FbankOptions()
|
||||
opts.frame_opts.dither = 0
|
||||
# Yes, it has same options like `Kaldi`
|
||||
|
||||
fbank = kaldifeat.Fbank(opts)
|
||||
features = fbank(wave)
|
||||
```
|
||||
|
||||
To compute features that are compatible with `Kaldi`, wave samples have to be
|
||||
scaled to the range `[-32768, 32768]`. WARNING: You don't have to do this if
|
||||
you don't care about the compatibility with `Kaldi`
|
||||
|
||||
The following is an example:
|
||||
|
||||
```python
|
||||
wave *= 32768
|
||||
fbank = kaldifeat.Fbank(opts)
|
||||
features = fbank(wave)
|
||||
print(features[:3])
|
||||
```
|
||||
|
||||
The output is:
|
||||
|
||||
```
|
||||
tensor([[15.0074, 21.1730, 25.5286, 24.4644, 16.6994, 13.8480, 11.2087, 11.7952,
|
||||
10.3911, 10.4491, 10.3012, 9.8743, 9.6997, 9.3751, 9.3476, 9.3559,
|
||||
9.1074, 9.0032, 9.0312, 8.8399, 9.0822, 8.7442, 8.4023],
|
||||
[13.8785, 20.5647, 25.4956, 24.6966, 16.9541, 13.9163, 11.3364, 11.8449,
|
||||
10.2565, 10.5871, 10.3484, 9.7474, 9.6123, 9.3964, 9.0695, 9.1177,
|
||||
8.9136, 8.8425, 8.5920, 8.8315, 8.6226, 8.8605, 8.9763],
|
||||
[13.9475, 19.9410, 25.4494, 24.9051, 17.0004, 13.9207, 11.6667, 11.8217,
|
||||
10.3411, 10.7258, 10.0983, 9.8109, 9.6762, 9.4218, 9.1246, 8.7744,
|
||||
9.0863, 8.7488, 8.4695, 8.6710, 8.7728, 8.7405, 8.9824]])
|
||||
```
|
||||
|
||||
You can compute the fbank feature for the same wave with `Kaldi` using the following commands:
|
||||
|
||||
```bash
|
||||
echo "1 test.wav" > test.scp
|
||||
compute-fbank-feats --dither=0 scp:test.scp ark,t:test.txt
|
||||
head -n4 test.txt
|
||||
```
|
||||
|
||||
The output is:
|
||||
|
||||
```
|
||||
1 [
|
||||
15.00744 21.17303 25.52861 24.46438 16.69938 13.84804 11.2087 11.79517 10.3911 10.44909 10.30123 9.874329 9.699727 9.37509 9.347578 9.355928 9.107419 9.00323 9.031268 8.839916 9.082197 8.744139 8.40221
|
||||
13.87853 20.56466 25.49562 24.69662 16.9541 13.91633 11.33638 11.84495 10.25656 10.58718 10.34841 9.747416 9.612316 9.39642 9.06955 9.117751 8.913527 8.842571 8.59212 8.831518 8.622513 8.86048 8.976251
|
||||
13.94753 19.94101 25.4494 24.90511 17.00044 13.92074 11.66673 11.82172 10.34108 10.72575 10.09829 9.810879 9.676199 9.421767 9.124647 8.774353 9.086291 8.74897 8.469534 8.670973 8.772754 8.740549 8.982433
|
||||
```
|
||||
|
||||
You can see that ``kaldifeat`` produces the same output as `Kaldi` (within some tolerance due to numerical precision).
|
||||
|
||||
|
||||
**HINT**: Download [test_scp][test_scp] and [test_txt][test_txt].
|
||||
|
||||
[test_scp]: kaldifeat/python/tests/test_data/test.scp
|
||||
[test_txt]: kaldifeat/python/tests/test_data/test.txt
|
||||
|
||||
|
||||
To use GPU, you can use:
|
||||
|
||||
```python
|
||||
import torch
|
||||
|
||||
opts = kaldifeat.FbankOptions()
|
||||
opts.device = torch.device("cuda", 0)
|
||||
|
||||
fbank = kaldifeat.Fbank(opts)
|
||||
features = fbank(wave.to(opts.device))
|
||||
```
|
||||
|
||||
## MFCC
|
||||
|
||||
To compute MFCC features, please replace `kaldifeat.FbankOptions` and `kaldifeat.Fbank`
|
||||
with `kaldifeat.MfccOptions` and `kaldifeat.Mfcc`, respectively.
|
||||
|
||||
Please refer to [kaldifeat/python/tests/test_fbank.py](kaldifeat/python/tests/test_fbank.py)
|
||||
and [kaldifeat/python/tests/test_mfcc.py](kaldifeat/python/tests/test_mfcc.py)
|
||||
for more examples.
|
||||
|
||||
**HINT**: In the examples, you can find that
|
||||
|
||||
- ``kaldifeat`` supports batch processing as well as chunk processing
|
||||
- ``kaldifeat`` uses the same options as `Kaldi`'s `compute-fbank-feats` and `compute-mfcc-feats`
|
||||
|
||||
# Installation
|
||||
|
||||
@ -34,11 +144,6 @@ python setup.py install
|
||||
```
|
||||
|
||||
To test whether `kaldifeat` was installed successfully, you can run:
|
||||
```
|
||||
```bash
|
||||
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Please refer to <https://kaldifeat.readthedocs.io/en/latest/usage.html>
|
||||
for how to use `kaldifeat`.
|
||||
|
@ -6,7 +6,7 @@ KALDI_ROOT=/root/fangjun/open-source/kaldi
|
||||
export PATH=${KALDI_ROOT}/src/featbin:$PATH
|
||||
|
||||
if [ ! -f test.wav ]; then
|
||||
# generate a wav of two seconds, containing a sine-wave
|
||||
# generate a wav of 1.2 seconds, containing a sine-wave
|
||||
# swept from 300 Hz to 3300 Hz
|
||||
sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300
|
||||
fi
|
||||
|
Loading…
x
Reference in New Issue
Block a user