Init pitch functions

This commit is contained in:
Feiteng 2024-01-24 17:36:25 +08:00
parent 7ae06d78eb
commit 5fca61b1d5
11 changed files with 305 additions and 2 deletions

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ __pycache__/
test-1hour.wav
path.sh
torch_version.py
*.DS_Store

View File

@ -7,6 +7,7 @@ set(kaldifeat_srcs
feature-plp.cc
feature-spectrogram.cc
feature-window.cc
pitch-functions.cc
matrix-functions.cc
mel-computations.cc
online-feature.cc

View File

@ -0,0 +1,14 @@
// kaldifeat/csrc/feature-fbank.cc
//
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
// This file is copied/modified from kaldi/src/feat/pitch-functions.cc
#include "kaldifeat/csrc/pitch-functions.h"
#include <cmath>
namespace kaldifeat {
} // namespace kaldifeat

View File

@ -20,7 +20,8 @@
#include <string>
#include "torch/script.h"
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"
namespace kaldifeat {
@ -144,10 +145,49 @@ struct PitchExtractionOptions {
os << "nccf_ballast_online: " << nccf_ballast_online << "\n";
os << "snip_edges: " << snip_edges << "\n";
os << "device: " << device << "\n";
return os.str();
}
};
// TODO(fangjun): Implement it
std::ostream &operator<<(std::ostream &os, const PitchExtractionOptions &opts);
class PitchComputer {
public:
using Options = PitchExtractionOptions;
explicit PitchComputer(const PitchExtractionOptions &opts);
~PitchComputer();
PitchComputer &operator=(const PitchComputer &) = delete;
PitchComputer(const PitchComputer &) = delete;
int32_t Dim() const {
return 2;
}
// // if true, compute log_energy_pre_window but after dithering and dc removal
// bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
// if true, compute log_energy_pre_window but after dithering and dc removal
bool NeedRawLogEnergy() const;
// const FrameExtractionOptions &GetFrameOptions() const {
// return opts_.frame_opts;
// }
const FrameExtractionOptions &GetFrameOptions();
const PitchExtractionOptions &GetOptions() const { return opts_; }
torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp,
const torch::Tensor &signal_frame);
private:
PitchExtractionOptions opts_;
};
using Pitch = OfflineFeatureTpl<PitchComputer>;
} // namespace kaldifeat

View File

@ -5,6 +5,7 @@ pybind11_add_module(_kaldifeat
feature-plp.cc
feature-spectrogram.cc
feature-window.cc
pitch-functions.cc
kaldifeat.cc
mel-computations.cc
online-feature.cc

View File

@ -0,0 +1,133 @@
// kaldifeat/python/csrc/pitch-functions.cc
//
// Copyright (c) 2024 (authors: Feiteng Li)
#include "kaldifeat/python/csrc/pitch-functions.h"
#include <memory>
#include <string>
#include "kaldifeat/csrc/pitch-functions.h"
#include "kaldifeat/python/csrc/utils.h"
namespace kaldifeat {
static void PybindPitchOptions(py::module &m) {
using PyClass = PitchExtractionOptions;
py::class_<PyClass>(m, "PitchOptions")
.def(py::init<>())
.def(py::init([](float samp_freq = 16000,
float frame_shift_ms = 10.0, float frame_length_ms = 25.0, float preemph_coeff = 0.0,
float min_f0 = 50, float max_f0 = 400, float soft_min_f0 = 10.0,
float penalty_factor = 0.1, float lowpass_cutoff = 1000, float resample_freq = 4000,
float delta_pitch = 0.005, float nccf_ballast = 7000,
int32_t lowpass_filter_width = 1, int32_t upsample_filter_width = 5,
int32_t max_frames_latency = 0, int32_t frames_per_chunk = 0,
bool simulate_first_pass_online = false, int32_t recompute_frame = 500,
bool nccf_ballast_online = false, bool snip_edges = true,
py::object device =
py::str("cpu")) -> std::unique_ptr<PitchExtractionOptions> {
auto opts = std::make_unique<PitchExtractionOptions>();
opts->samp_freq = samp_freq;
opts->frame_shift_ms = frame_shift_ms;
opts->frame_length_ms = frame_length_ms;
opts->preemph_coeff = preemph_coeff;
opts->min_f0 = min_f0;
opts->max_f0 = max_f0;
opts->soft_min_f0 = soft_min_f0;
opts->penalty_factor = penalty_factor;
opts->lowpass_cutoff = lowpass_cutoff;
opts->resample_freq = resample_freq;
opts->delta_pitch = delta_pitch;
opts->nccf_ballast = nccf_ballast;
opts->lowpass_filter_width = lowpass_filter_width;
opts->upsample_filter_width = upsample_filter_width;
opts->max_frames_latency = max_frames_latency;
opts->frames_per_chunk = frames_per_chunk;
opts->simulate_first_pass_online = simulate_first_pass_online;
opts->recompute_frame = recompute_frame;
opts->nccf_ballast_online = nccf_ballast_online;
opts->snip_edges = snip_edges;
std::string s = static_cast<py::str>(device);
opts->device = torch::Device(s);
return opts;
}),
py::arg("samp_freq") = 16000, py::arg("frame_shift_ms") = 10.0,
py::arg("frame_length_ms") = 25.0, py::arg("preemph_coeff") = 0.0,
py::arg("min_f0") = 50, py::arg("max_f0") = 400, py::arg("soft_min_f0") = 10.0,
py::arg("penalty_factor") = 0.1, py::arg("lowpass_cutoff") = 1000, py::arg("resample_freq") = 4000,
py::arg("delta_pitch") = 0.005, py::arg("nccf_ballast") = 7000,
py::arg("lowpass_filter_width") = 1, py::arg("upsample_filter_width") = 5,
py::arg("max_frames_latency") = 0, py::arg("frames_per_chunk") = 0,
py::arg("simulate_first_pass_online") = false, py::arg("recompute_frame") = 500,
py::arg("nccf_ballast_online") = false, py::arg("snip_edges") = true,
py::arg("device") = py::str("cpu"))
.def_readwrite("samp_freq", &PyClass::samp_freq)
.def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
.def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
.def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
.def_readwrite("min_f0", &PyClass::min_f0)
.def_readwrite("max_f0", &PyClass::max_f0)
.def_readwrite("soft_min_f0", &PyClass::soft_min_f0)
.def_readwrite("penalty_factor", &PyClass::penalty_factor)
.def_readwrite("lowpass_cutoff", &PyClass::lowpass_cutoff)
.def_readwrite("resample_freq", &PyClass::resample_freq)
.def_readwrite("delta_pitch", &PyClass::delta_pitch)
.def_readwrite("nccf_ballast", &PyClass::nccf_ballast)
.def_readwrite("lowpass_filter_width", &PyClass::lowpass_filter_width)
.def_readwrite("upsample_filter_width", &PyClass::upsample_filter_width)
.def_readwrite("max_frames_latency", &PyClass::max_frames_latency)
.def_readwrite("frames_per_chunk", &PyClass::frames_per_chunk)
.def_readwrite("simulate_first_pass_online",
&PyClass::simulate_first_pass_online)
.def_readwrite("recompute_frame", &PyClass::recompute_frame)
.def_readwrite("nccf_ballast_online", &PyClass::nccf_ballast_online)
.def_readwrite("snip_edges", &PyClass::snip_edges)
.def_property(
"device",
[](const PyClass &self) -> py::object {
py::object ans = py::module_::import("torch").attr("device");
return ans(self.device.str());
},
[](PyClass &self, py::object obj) -> void {
std::string s = static_cast<py::str>(obj);
self.device = torch::Device(s);
})
.def("__str__",
[](const PyClass &self) -> std::string { return self.ToString(); })
.def("as_dict",
[](const PyClass &self) -> py::dict { return AsDict(self); })
.def_static(
"from_dict",
[](py::dict dict) -> PyClass { return PitchOptionsFromDict(dict); })
.def(py::pickle(
[](const PyClass &self) -> py::dict { return AsDict(self); },
[](py::dict dict) -> PyClass { return PitchOptionsFromDict(dict); }));
}
static void PybindPitch(py::module &m) {
using PyClass = Pitch;
py::class_<PyClass>(m, "Pitch")
.def(py::init<const PitchOptions &>(), py::arg("opts"))
.def("dim", &PyClass::Dim)
.def_property_readonly("options", &PyClass::GetOptions)
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &self) -> py::dict {
return AsDict(self.GetOptions());
},
[](py::dict dict) -> std::unique_ptr<PyClass> {
return std::make_unique<PyClass>(PitchOptionsFromDict(dict));
}));
}
void PybindPitchFunctions(py::module &m) {
PybindPitchOptions(m);
PybindPitch(m);
}
} // namespace kaldifeat

View File

@ -0,0 +1,16 @@
// kaldifeat/python/csrc/pitch-functions.h
//
// Copyright (c) 2024 (authors: Feiteng Li)
#ifndef KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_
#define KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_
#include "kaldifeat/python/csrc/kaldifeat.h"
namespace kaldifeat {
void PybindPitchFunctions(py::module &m);
} // namespace kaldifeat
#endif // KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_

View File

@ -152,6 +152,69 @@ py::dict AsDict(const WhisperFbankOptions &opts) {
return dict;
}
PitchOptions PitchOptionsFromDict(py::dict dict) {
PitchOptions opts;
FROM_DICT(float_, samp_freq);
FROM_DICT(float_, frame_shift_ms);
FROM_DICT(float_, frame_length_ms);
FROM_DICT(float_, preemph_coeff);
FROM_DICT(float_, min_f0);
FROM_DICT(float_, max_f0);
FROM_DICT(float_, soft_min_f0);
FROM_DICT(float_, penalty_factor);
FROM_DICT(float_, lowpass_cutoff);
FROM_DICT(float_, resample_freq);
FROM_DICT(float_, delta_pitch);
FROM_DICT(float_, nccf_ballast);
FROM_DICT(int_, lowpass_filter_width);
FROM_DICT(int_, upsample_filter_width);
FROM_DICT(int_, max_frames_latency);
FROM_DICT(int_, frames_per_chunk);
FROM_DICT(bool_, simulate_first_pass_online);
FROM_DICT(int_, recompute_frame);
FROM_DICT(bool_, nccf_ballast_online);
FROM_DICT(bool_, snip_edges);
if (dict.contains("device")) {
opts.device = torch::Device(std::string(py::str(dict["device"])));
}
return opts;
}
py::dict AsDict(const PitchOptions &opts) {
py::dict dict;
AS_DICT(samp_freq);
AS_DICT(frame_shift_ms);
AS_DICT(frame_length_ms);
AS_DICT(preemph_coeff);
AS_DICT(min_f0);
AS_DICT(max_f0);
AS_DICT(soft_min_f0);
AS_DICT(penalty_factor);
AS_DICT(lowpass_cutoff);
AS_DICT(resample_freq);
AS_DICT(delta_pitch);
AS_DICT(nccf_ballast);
AS_DICT(lowpass_filter_width);
AS_DICT(upsample_filter_width);
AS_DICT(max_frames_latency);
AS_DICT(frames_per_chunk);
AS_DICT(simulate_first_pass_online);
AS_DICT(recompute_frame);
AS_DICT(nccf_ballast_online);
AS_DICT(snip_edges);
auto torch_device = py::module_::import("torch").attr("device");
dict["device"] = torch_device(opts.device.str());
return dict;
}
MfccOptions MfccOptionsFromDict(py::dict dict) {
MfccOptions opts;

View File

@ -10,6 +10,7 @@
#include "kaldifeat/csrc/feature-plp.h"
#include "kaldifeat/csrc/feature-spectrogram.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/pitch-functions.h"
#include "kaldifeat/csrc/mel-computations.h"
#include "kaldifeat/csrc/whisper-fbank.h"
#include "kaldifeat/python/csrc/kaldifeat.h"
@ -26,8 +27,12 @@
* all fields use their default values.
*/
namespace kaldifeat {
using PitchOptions = kaldifeat::PitchExtractionOptions;
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
py::dict AsDict(const FrameExtractionOptions &opts);
@ -40,6 +45,9 @@ py::dict AsDict(const FbankOptions &opts);
WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict);
py::dict AsDict(const WhisperFbankOptions &opts);
PitchOptions PitchOptionsFromDict(py::dict dict);
py::dict AsDict(const PitchOptions &opts);
MfccOptions MfccOptionsFromDict(py::dict dict);
py::dict AsDict(const MfccOptions &opts);

View File

@ -15,6 +15,7 @@ from _kaldifeat import (
FrameExtractionOptions,
MelBanksOptions,
MfccOptions,
PitchOptions,
PlpOptions,
SpectrogramOptions,
WhisperFbankOptions,
@ -25,9 +26,21 @@ from .fbank import Fbank, OnlineFbank
from .mfcc import Mfcc, OnlineMfcc
from .offline_feature import OfflineFeature
from .online_feature import OnlineFeature
from .pitch import Pitch
from .plp import OnlinePlp, Plp
from .spectrogram import Spectrogram
from .whisper_fbank import WhisperFbank
cmake_prefix_path = _Path(__file__).parent / "share" / "cmake"
del _Path
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'

View File

@ -0,0 +1,13 @@
# Copyright (c) 2024 (authors: Feiteng Li)
import _kaldifeat
from .offline_feature import OfflineFeature
class Pitch(OfflineFeature):
def __init__(self, opts: _kaldifeat.FbankOptions):
super().__init__(opts)
self.computer = _kaldifeat.Pitch(opts)