mirror of
https://github.com/csukuangfj/kaldifeat.git
synced 2025-08-09 01:52:39 +00:00
Merge 5fca61b1d5690d5470a2d20200c0cb67fd17d304 into 40cc0a4a2cf4b39d4c5405a55c7be14863287527
This commit is contained in:
commit
1f59a67f3a
1
.gitignore
vendored
1
.gitignore
vendored
@ -6,3 +6,4 @@ __pycache__/
|
|||||||
test-1hour.wav
|
test-1hour.wav
|
||||||
path.sh
|
path.sh
|
||||||
torch_version.py
|
torch_version.py
|
||||||
|
*.DS_Store
|
||||||
|
@ -7,6 +7,7 @@ set(kaldifeat_srcs
|
|||||||
feature-plp.cc
|
feature-plp.cc
|
||||||
feature-spectrogram.cc
|
feature-spectrogram.cc
|
||||||
feature-window.cc
|
feature-window.cc
|
||||||
|
pitch-functions.cc
|
||||||
matrix-functions.cc
|
matrix-functions.cc
|
||||||
mel-computations.cc
|
mel-computations.cc
|
||||||
online-feature.cc
|
online-feature.cc
|
||||||
|
14
kaldifeat/csrc/pitch-functions.cc
Normal file
14
kaldifeat/csrc/pitch-functions.cc
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
// kaldifeat/csrc/feature-fbank.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
// This file is copied/modified from kaldi/src/feat/pitch-functions.cc
|
||||||
|
|
||||||
|
#include "kaldifeat/csrc/pitch-functions.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
namespace kaldifeat {
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace kaldifeat
|
@ -20,7 +20,8 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "torch/script.h"
|
#include "kaldifeat/csrc/feature-common.h"
|
||||||
|
#include "kaldifeat/csrc/feature-window.h"
|
||||||
|
|
||||||
namespace kaldifeat {
|
namespace kaldifeat {
|
||||||
|
|
||||||
@ -144,10 +145,49 @@ struct PitchExtractionOptions {
|
|||||||
os << "nccf_ballast_online: " << nccf_ballast_online << "\n";
|
os << "nccf_ballast_online: " << nccf_ballast_online << "\n";
|
||||||
os << "snip_edges: " << snip_edges << "\n";
|
os << "snip_edges: " << snip_edges << "\n";
|
||||||
os << "device: " << device << "\n";
|
os << "device: " << device << "\n";
|
||||||
|
return os.str();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO(fangjun): Implement it
|
|
||||||
|
std::ostream &operator<<(std::ostream &os, const PitchExtractionOptions &opts);
|
||||||
|
|
||||||
|
class PitchComputer {
|
||||||
|
public:
|
||||||
|
using Options = PitchExtractionOptions;
|
||||||
|
|
||||||
|
explicit PitchComputer(const PitchExtractionOptions &opts);
|
||||||
|
~PitchComputer();
|
||||||
|
|
||||||
|
PitchComputer &operator=(const PitchComputer &) = delete;
|
||||||
|
PitchComputer(const PitchComputer &) = delete;
|
||||||
|
|
||||||
|
int32_t Dim() const {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// // if true, compute log_energy_pre_window but after dithering and dc removal
|
||||||
|
// bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
|
||||||
|
|
||||||
|
// if true, compute log_energy_pre_window but after dithering and dc removal
|
||||||
|
bool NeedRawLogEnergy() const;
|
||||||
|
|
||||||
|
// const FrameExtractionOptions &GetFrameOptions() const {
|
||||||
|
// return opts_.frame_opts;
|
||||||
|
// }
|
||||||
|
|
||||||
|
const FrameExtractionOptions &GetFrameOptions();
|
||||||
|
|
||||||
|
const PitchExtractionOptions &GetOptions() const { return opts_; }
|
||||||
|
|
||||||
|
torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp,
|
||||||
|
const torch::Tensor &signal_frame);
|
||||||
|
|
||||||
|
private:
|
||||||
|
PitchExtractionOptions opts_;
|
||||||
|
};
|
||||||
|
|
||||||
|
using Pitch = OfflineFeatureTpl<PitchComputer>;
|
||||||
|
|
||||||
} // namespace kaldifeat
|
} // namespace kaldifeat
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ pybind11_add_module(_kaldifeat
|
|||||||
feature-plp.cc
|
feature-plp.cc
|
||||||
feature-spectrogram.cc
|
feature-spectrogram.cc
|
||||||
feature-window.cc
|
feature-window.cc
|
||||||
|
pitch-functions.cc
|
||||||
kaldifeat.cc
|
kaldifeat.cc
|
||||||
mel-computations.cc
|
mel-computations.cc
|
||||||
online-feature.cc
|
online-feature.cc
|
||||||
|
133
kaldifeat/python/csrc/pitch-functions.cc
Normal file
133
kaldifeat/python/csrc/pitch-functions.cc
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
// kaldifeat/python/csrc/pitch-functions.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2024 (authors: Feiteng Li)
|
||||||
|
|
||||||
|
#include "kaldifeat/python/csrc/pitch-functions.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "kaldifeat/csrc/pitch-functions.h"
|
||||||
|
#include "kaldifeat/python/csrc/utils.h"
|
||||||
|
|
||||||
|
namespace kaldifeat {
|
||||||
|
|
||||||
|
static void PybindPitchOptions(py::module &m) {
|
||||||
|
using PyClass = PitchExtractionOptions;
|
||||||
|
py::class_<PyClass>(m, "PitchOptions")
|
||||||
|
.def(py::init<>())
|
||||||
|
.def(py::init([](float samp_freq = 16000,
|
||||||
|
float frame_shift_ms = 10.0, float frame_length_ms = 25.0, float preemph_coeff = 0.0,
|
||||||
|
float min_f0 = 50, float max_f0 = 400, float soft_min_f0 = 10.0,
|
||||||
|
float penalty_factor = 0.1, float lowpass_cutoff = 1000, float resample_freq = 4000,
|
||||||
|
float delta_pitch = 0.005, float nccf_ballast = 7000,
|
||||||
|
int32_t lowpass_filter_width = 1, int32_t upsample_filter_width = 5,
|
||||||
|
int32_t max_frames_latency = 0, int32_t frames_per_chunk = 0,
|
||||||
|
bool simulate_first_pass_online = false, int32_t recompute_frame = 500,
|
||||||
|
bool nccf_ballast_online = false, bool snip_edges = true,
|
||||||
|
py::object device =
|
||||||
|
py::str("cpu")) -> std::unique_ptr<PitchExtractionOptions> {
|
||||||
|
auto opts = std::make_unique<PitchExtractionOptions>();
|
||||||
|
opts->samp_freq = samp_freq;
|
||||||
|
opts->frame_shift_ms = frame_shift_ms;
|
||||||
|
opts->frame_length_ms = frame_length_ms;
|
||||||
|
opts->preemph_coeff = preemph_coeff;
|
||||||
|
opts->min_f0 = min_f0;
|
||||||
|
opts->max_f0 = max_f0;
|
||||||
|
opts->soft_min_f0 = soft_min_f0;
|
||||||
|
opts->penalty_factor = penalty_factor;
|
||||||
|
opts->lowpass_cutoff = lowpass_cutoff;
|
||||||
|
opts->resample_freq = resample_freq;
|
||||||
|
opts->delta_pitch = delta_pitch;
|
||||||
|
opts->nccf_ballast = nccf_ballast;
|
||||||
|
opts->lowpass_filter_width = lowpass_filter_width;
|
||||||
|
opts->upsample_filter_width = upsample_filter_width;
|
||||||
|
opts->max_frames_latency = max_frames_latency;
|
||||||
|
opts->frames_per_chunk = frames_per_chunk;
|
||||||
|
opts->simulate_first_pass_online = simulate_first_pass_online;
|
||||||
|
opts->recompute_frame = recompute_frame;
|
||||||
|
opts->nccf_ballast_online = nccf_ballast_online;
|
||||||
|
opts->snip_edges = snip_edges;
|
||||||
|
|
||||||
|
std::string s = static_cast<py::str>(device);
|
||||||
|
opts->device = torch::Device(s);
|
||||||
|
|
||||||
|
return opts;
|
||||||
|
}),
|
||||||
|
py::arg("samp_freq") = 16000, py::arg("frame_shift_ms") = 10.0,
|
||||||
|
py::arg("frame_length_ms") = 25.0, py::arg("preemph_coeff") = 0.0,
|
||||||
|
py::arg("min_f0") = 50, py::arg("max_f0") = 400, py::arg("soft_min_f0") = 10.0,
|
||||||
|
py::arg("penalty_factor") = 0.1, py::arg("lowpass_cutoff") = 1000, py::arg("resample_freq") = 4000,
|
||||||
|
py::arg("delta_pitch") = 0.005, py::arg("nccf_ballast") = 7000,
|
||||||
|
py::arg("lowpass_filter_width") = 1, py::arg("upsample_filter_width") = 5,
|
||||||
|
py::arg("max_frames_latency") = 0, py::arg("frames_per_chunk") = 0,
|
||||||
|
py::arg("simulate_first_pass_online") = false, py::arg("recompute_frame") = 500,
|
||||||
|
py::arg("nccf_ballast_online") = false, py::arg("snip_edges") = true,
|
||||||
|
py::arg("device") = py::str("cpu"))
|
||||||
|
|
||||||
|
.def_readwrite("samp_freq", &PyClass::samp_freq)
|
||||||
|
.def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
|
||||||
|
.def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
|
||||||
|
.def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
|
||||||
|
.def_readwrite("min_f0", &PyClass::min_f0)
|
||||||
|
.def_readwrite("max_f0", &PyClass::max_f0)
|
||||||
|
.def_readwrite("soft_min_f0", &PyClass::soft_min_f0)
|
||||||
|
.def_readwrite("penalty_factor", &PyClass::penalty_factor)
|
||||||
|
.def_readwrite("lowpass_cutoff", &PyClass::lowpass_cutoff)
|
||||||
|
.def_readwrite("resample_freq", &PyClass::resample_freq)
|
||||||
|
.def_readwrite("delta_pitch", &PyClass::delta_pitch)
|
||||||
|
.def_readwrite("nccf_ballast", &PyClass::nccf_ballast)
|
||||||
|
.def_readwrite("lowpass_filter_width", &PyClass::lowpass_filter_width)
|
||||||
|
.def_readwrite("upsample_filter_width", &PyClass::upsample_filter_width)
|
||||||
|
.def_readwrite("max_frames_latency", &PyClass::max_frames_latency)
|
||||||
|
.def_readwrite("frames_per_chunk", &PyClass::frames_per_chunk)
|
||||||
|
.def_readwrite("simulate_first_pass_online",
|
||||||
|
&PyClass::simulate_first_pass_online)
|
||||||
|
.def_readwrite("recompute_frame", &PyClass::recompute_frame)
|
||||||
|
.def_readwrite("nccf_ballast_online", &PyClass::nccf_ballast_online)
|
||||||
|
.def_readwrite("snip_edges", &PyClass::snip_edges)
|
||||||
|
.def_property(
|
||||||
|
"device",
|
||||||
|
[](const PyClass &self) -> py::object {
|
||||||
|
py::object ans = py::module_::import("torch").attr("device");
|
||||||
|
return ans(self.device.str());
|
||||||
|
},
|
||||||
|
[](PyClass &self, py::object obj) -> void {
|
||||||
|
std::string s = static_cast<py::str>(obj);
|
||||||
|
self.device = torch::Device(s);
|
||||||
|
})
|
||||||
|
.def("__str__",
|
||||||
|
[](const PyClass &self) -> std::string { return self.ToString(); })
|
||||||
|
.def("as_dict",
|
||||||
|
[](const PyClass &self) -> py::dict { return AsDict(self); })
|
||||||
|
.def_static(
|
||||||
|
"from_dict",
|
||||||
|
[](py::dict dict) -> PyClass { return PitchOptionsFromDict(dict); })
|
||||||
|
.def(py::pickle(
|
||||||
|
[](const PyClass &self) -> py::dict { return AsDict(self); },
|
||||||
|
[](py::dict dict) -> PyClass { return PitchOptionsFromDict(dict); }));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void PybindPitch(py::module &m) {
|
||||||
|
using PyClass = Pitch;
|
||||||
|
py::class_<PyClass>(m, "Pitch")
|
||||||
|
.def(py::init<const PitchOptions &>(), py::arg("opts"))
|
||||||
|
.def("dim", &PyClass::Dim)
|
||||||
|
.def_property_readonly("options", &PyClass::GetOptions)
|
||||||
|
.def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"),
|
||||||
|
py::arg("vtln_warp"), py::call_guard<py::gil_scoped_release>())
|
||||||
|
.def(py::pickle(
|
||||||
|
[](const PyClass &self) -> py::dict {
|
||||||
|
return AsDict(self.GetOptions());
|
||||||
|
},
|
||||||
|
[](py::dict dict) -> std::unique_ptr<PyClass> {
|
||||||
|
return std::make_unique<PyClass>(PitchOptionsFromDict(dict));
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
void PybindPitchFunctions(py::module &m) {
|
||||||
|
PybindPitchOptions(m);
|
||||||
|
PybindPitch(m);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace kaldifeat
|
16
kaldifeat/python/csrc/pitch-functions.h
Normal file
16
kaldifeat/python/csrc/pitch-functions.h
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
// kaldifeat/python/csrc/pitch-functions.h
|
||||||
|
//
|
||||||
|
// Copyright (c) 2024 (authors: Feiteng Li)
|
||||||
|
|
||||||
|
#ifndef KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_
|
||||||
|
#define KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_
|
||||||
|
|
||||||
|
#include "kaldifeat/python/csrc/kaldifeat.h"
|
||||||
|
|
||||||
|
namespace kaldifeat {
|
||||||
|
|
||||||
|
void PybindPitchFunctions(py::module &m);
|
||||||
|
|
||||||
|
} // namespace kaldifeat
|
||||||
|
|
||||||
|
#endif // KALDIFEAT_PYTHON_CSRC_PITCH_FUNCTIONS_H_
|
@ -152,6 +152,69 @@ py::dict AsDict(const WhisperFbankOptions &opts) {
|
|||||||
return dict;
|
return dict;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PitchOptions PitchOptionsFromDict(py::dict dict) {
|
||||||
|
PitchOptions opts;
|
||||||
|
|
||||||
|
FROM_DICT(float_, samp_freq);
|
||||||
|
FROM_DICT(float_, frame_shift_ms);
|
||||||
|
FROM_DICT(float_, frame_length_ms);
|
||||||
|
FROM_DICT(float_, preemph_coeff);
|
||||||
|
FROM_DICT(float_, min_f0);
|
||||||
|
FROM_DICT(float_, max_f0);
|
||||||
|
FROM_DICT(float_, soft_min_f0);
|
||||||
|
FROM_DICT(float_, penalty_factor);
|
||||||
|
FROM_DICT(float_, lowpass_cutoff);
|
||||||
|
FROM_DICT(float_, resample_freq);
|
||||||
|
FROM_DICT(float_, delta_pitch);
|
||||||
|
FROM_DICT(float_, nccf_ballast);
|
||||||
|
FROM_DICT(int_, lowpass_filter_width);
|
||||||
|
FROM_DICT(int_, upsample_filter_width);
|
||||||
|
FROM_DICT(int_, max_frames_latency);
|
||||||
|
FROM_DICT(int_, frames_per_chunk);
|
||||||
|
FROM_DICT(bool_, simulate_first_pass_online);
|
||||||
|
FROM_DICT(int_, recompute_frame);
|
||||||
|
FROM_DICT(bool_, nccf_ballast_online);
|
||||||
|
FROM_DICT(bool_, snip_edges);
|
||||||
|
|
||||||
|
if (dict.contains("device")) {
|
||||||
|
opts.device = torch::Device(std::string(py::str(dict["device"])));
|
||||||
|
}
|
||||||
|
|
||||||
|
return opts;
|
||||||
|
}
|
||||||
|
|
||||||
|
py::dict AsDict(const PitchOptions &opts) {
|
||||||
|
py::dict dict;
|
||||||
|
|
||||||
|
AS_DICT(samp_freq);
|
||||||
|
AS_DICT(frame_shift_ms);
|
||||||
|
AS_DICT(frame_length_ms);
|
||||||
|
AS_DICT(preemph_coeff);
|
||||||
|
AS_DICT(min_f0);
|
||||||
|
AS_DICT(max_f0);
|
||||||
|
AS_DICT(soft_min_f0);
|
||||||
|
AS_DICT(penalty_factor);
|
||||||
|
AS_DICT(lowpass_cutoff);
|
||||||
|
AS_DICT(resample_freq);
|
||||||
|
AS_DICT(delta_pitch);
|
||||||
|
AS_DICT(nccf_ballast);
|
||||||
|
AS_DICT(lowpass_filter_width);
|
||||||
|
AS_DICT(upsample_filter_width);
|
||||||
|
AS_DICT(max_frames_latency);
|
||||||
|
AS_DICT(frames_per_chunk);
|
||||||
|
AS_DICT(simulate_first_pass_online);
|
||||||
|
AS_DICT(recompute_frame);
|
||||||
|
AS_DICT(nccf_ballast_online);
|
||||||
|
AS_DICT(snip_edges);
|
||||||
|
|
||||||
|
auto torch_device = py::module_::import("torch").attr("device");
|
||||||
|
dict["device"] = torch_device(opts.device.str());
|
||||||
|
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
MfccOptions MfccOptionsFromDict(py::dict dict) {
|
MfccOptions MfccOptionsFromDict(py::dict dict) {
|
||||||
MfccOptions opts;
|
MfccOptions opts;
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "kaldifeat/csrc/feature-plp.h"
|
#include "kaldifeat/csrc/feature-plp.h"
|
||||||
#include "kaldifeat/csrc/feature-spectrogram.h"
|
#include "kaldifeat/csrc/feature-spectrogram.h"
|
||||||
#include "kaldifeat/csrc/feature-window.h"
|
#include "kaldifeat/csrc/feature-window.h"
|
||||||
|
#include "kaldifeat/csrc/pitch-functions.h"
|
||||||
#include "kaldifeat/csrc/mel-computations.h"
|
#include "kaldifeat/csrc/mel-computations.h"
|
||||||
#include "kaldifeat/csrc/whisper-fbank.h"
|
#include "kaldifeat/csrc/whisper-fbank.h"
|
||||||
#include "kaldifeat/python/csrc/kaldifeat.h"
|
#include "kaldifeat/python/csrc/kaldifeat.h"
|
||||||
@ -26,8 +27,12 @@
|
|||||||
* all fields use their default values.
|
* all fields use their default values.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
namespace kaldifeat {
|
namespace kaldifeat {
|
||||||
|
|
||||||
|
using PitchOptions = kaldifeat::PitchExtractionOptions;
|
||||||
|
|
||||||
|
|
||||||
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
|
FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
|
||||||
py::dict AsDict(const FrameExtractionOptions &opts);
|
py::dict AsDict(const FrameExtractionOptions &opts);
|
||||||
|
|
||||||
@ -40,6 +45,9 @@ py::dict AsDict(const FbankOptions &opts);
|
|||||||
WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict);
|
WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict);
|
||||||
py::dict AsDict(const WhisperFbankOptions &opts);
|
py::dict AsDict(const WhisperFbankOptions &opts);
|
||||||
|
|
||||||
|
PitchOptions PitchOptionsFromDict(py::dict dict);
|
||||||
|
py::dict AsDict(const PitchOptions &opts);
|
||||||
|
|
||||||
MfccOptions MfccOptionsFromDict(py::dict dict);
|
MfccOptions MfccOptionsFromDict(py::dict dict);
|
||||||
py::dict AsDict(const MfccOptions &opts);
|
py::dict AsDict(const MfccOptions &opts);
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ from _kaldifeat import (
|
|||||||
FrameExtractionOptions,
|
FrameExtractionOptions,
|
||||||
MelBanksOptions,
|
MelBanksOptions,
|
||||||
MfccOptions,
|
MfccOptions,
|
||||||
|
PitchOptions,
|
||||||
PlpOptions,
|
PlpOptions,
|
||||||
SpectrogramOptions,
|
SpectrogramOptions,
|
||||||
WhisperFbankOptions,
|
WhisperFbankOptions,
|
||||||
@ -25,9 +26,21 @@ from .fbank import Fbank, OnlineFbank
|
|||||||
from .mfcc import Mfcc, OnlineMfcc
|
from .mfcc import Mfcc, OnlineMfcc
|
||||||
from .offline_feature import OfflineFeature
|
from .offline_feature import OfflineFeature
|
||||||
from .online_feature import OnlineFeature
|
from .online_feature import OnlineFeature
|
||||||
|
from .pitch import Pitch
|
||||||
from .plp import OnlinePlp, Plp
|
from .plp import OnlinePlp, Plp
|
||||||
from .spectrogram import Spectrogram
|
from .spectrogram import Spectrogram
|
||||||
from .whisper_fbank import WhisperFbank
|
from .whisper_fbank import WhisperFbank
|
||||||
|
|
||||||
cmake_prefix_path = _Path(__file__).parent / "share" / "cmake"
|
cmake_prefix_path = _Path(__file__).parent / "share" / "cmake"
|
||||||
del _Path
|
del _Path
|
||||||
|
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
__version__ = '1.25.4.dev20240124+cpu.torch2.1.1'
|
||||||
|
13
kaldifeat/python/kaldifeat/pitch.py
Normal file
13
kaldifeat/python/kaldifeat/pitch.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# Copyright (c) 2024 (authors: Feiteng Li)
|
||||||
|
|
||||||
|
|
||||||
|
import _kaldifeat
|
||||||
|
|
||||||
|
from .offline_feature import OfflineFeature
|
||||||
|
|
||||||
|
|
||||||
|
class Pitch(OfflineFeature):
|
||||||
|
def __init__(self, opts: _kaldifeat.FbankOptions):
|
||||||
|
super().__init__(opts)
|
||||||
|
self.computer = _kaldifeat.Pitch(opts)
|
||||||
|
|
@ -23,6 +23,7 @@ set(py_test_files
|
|||||||
test_mel_bank_options.py
|
test_mel_bank_options.py
|
||||||
test_mfcc.py
|
test_mfcc.py
|
||||||
test_mfcc_options.py
|
test_mfcc_options.py
|
||||||
|
test_pitch.py
|
||||||
test_plp.py
|
test_plp.py
|
||||||
test_plp_options.py
|
test_plp_options.py
|
||||||
test_spectrogram.py
|
test_spectrogram.py
|
||||||
|
41
kaldifeat/python/tests/test_pitch.py
Executable file
41
kaldifeat/python/tests/test_pitch.py
Executable file
@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# Copyright 2021-2022 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from utils import get_devices, read_ark_txt, read_wave
|
||||||
|
|
||||||
|
import kaldifeat
|
||||||
|
|
||||||
|
cur_dir = Path(__file__).resolve().parent
|
||||||
|
|
||||||
|
|
||||||
|
def test_pitch_default():
|
||||||
|
print("=====test_pitch_default=====")
|
||||||
|
filename = cur_dir / "test_data/test.wav"
|
||||||
|
wave = read_wave(filename)
|
||||||
|
gt = read_ark_txt(cur_dir / "test_data/test-pitch.txt")
|
||||||
|
|
||||||
|
cpu_features = None
|
||||||
|
for device in get_devices():
|
||||||
|
print("device", device)
|
||||||
|
opts = kaldifeat.PitchOptions()
|
||||||
|
opts.device = device
|
||||||
|
opts.frame_opts.dither = 0
|
||||||
|
pitch = kaldifeat.Pitch(opts)
|
||||||
|
|
||||||
|
features = pitch(wave)
|
||||||
|
assert features.device.type == "cpu"
|
||||||
|
assert torch.allclose(features, gt, rtol=1e-4)
|
||||||
|
if cpu_features is None:
|
||||||
|
cpu_features = features
|
||||||
|
|
||||||
|
features = pitch(wave.to(device))
|
||||||
|
assert features.device == device
|
||||||
|
assert torch.allclose(features.cpu(), gt, rtol=1e-4)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_pitch_default()
|
Loading…
x
Reference in New Issue
Block a user