kaldifeat/kaldifeat/csrc/feature-mfcc.h
2021-07-18 19:46:14 +08:00

120 lines
3.3 KiB
C++

// kaldifeat/csrc/feature-mfcc.h
//
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
// This file is copied/modified from kaldi/src/feat/feature-mfcc.h
#ifndef KALDIFEAT_CSRC_FEATURE_MFCC_H_
#define KALDIFEAT_CSRC_FEATURE_MFCC_H_
#include <map>
#include <string>
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"
#include "kaldifeat/csrc/mel-computations.h"
#include "torch/torch.h"
namespace kaldifeat {
/// MfccOptions contains basic options for computing MFCC features.
// (this class is copied from kaldi)
struct MfccOptions {
FrameExtractionOptions frame_opts;
MelBanksOptions mel_opts;
// Number of cepstra in MFCC computation (including C0)
int32_t num_ceps = 13;
// Use energy (not C0) in MFCC computation
bool use_energy = true;
// Floor on energy (absolute, not relative) in MFCC
// computation. Only makes a difference if use_energy=true;
// only necessary if dither=0.0.
// Suggested values: 0.1 or 1.0
float energy_floor = 0.0;
// If true, compute energy before preemphasis and windowing
bool raw_energy = true;
// Constant that controls scaling of MFCCs
float cepstral_lifter = 22.0;
// If true, put energy or C0 last and use a factor of
// sqrt(2) on C0.
// Warning: not sufficient to get HTK compatible features
// (need to change other parameters)
bool htk_compat = false;
torch::Device device{"cpu"};
MfccOptions() { mel_opts.num_bins = 23; }
std::string ToString() const {
std::ostringstream os;
os << "frame_opts: \n";
os << frame_opts << "\n";
os << "\n";
os << "mel_opts: \n";
os << mel_opts << "\n";
os << "num_ceps: " << num_ceps << "\n";
os << "use_energy: " << use_energy << "\n";
os << "energy_floor: " << energy_floor << "\n";
os << "raw_energy: " << raw_energy << "\n";
os << "cepstral_lifter: " << cepstral_lifter << "\n";
os << "htk_compat: " << htk_compat << "\n";
os << "device: " << device << "\n";
return os.str();
}
};
std::ostream &operator<<(std::ostream &os, const MfccOptions &opts);
class MfccComputer {
public:
using Options = MfccOptions;
explicit MfccComputer(const MfccOptions &opts);
~MfccComputer();
MfccComputer &operator=(const MfccComputer &) = delete;
MfccComputer(const MfccComputer &) = delete;
int32_t Dim() const { return opts_.num_ceps; }
bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
const MfccOptions &GetOptions() const { return opts_; }
// signal_raw_log_energy is log_energy_pre_window, which is not empty
// iff NeedRawLogEnergy() returns true.
torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp,
const torch::Tensor &signal_frame);
private:
const MelBanks *GetMelBanks(float vtln_warp);
MfccOptions opts_;
torch::Tensor lifter_coeffs_; // 1-D tensor
// Note we save a transposed version of dct_matrix_
// dct_matrix_.rows is num_mel_bins
// dct_matrix_.cols is num_ceps
torch::Tensor dct_matrix_; // matrix we right-multiply by to perform DCT.
float log_energy_floor_;
std::map<float, MelBanks *> mel_banks_; // float is VTLN coefficient.
};
using Mfcc = OfflineFeatureTpl<MfccComputer>;
} // namespace kaldifeat
#endif // KALDIFEAT_CSRC_FEATURE_MFCC_H_