21static unsigned _spec_N = 0;
22static double *_spec_in = NULL;
23static fftw_complex *_spec_out = NULL;
24static fftw_plan _spec_plan = NULL;
33static double *_stft_win = NULL;
34static unsigned _stft_win_N = 0;
43static unsigned _mel_N = 0;
44static unsigned _mel_num_mels = 0;
45static double _mel_sample_rate = 0.0;
46static double _mel_min_freq_hz = 0.0;
47static double _mel_max_freq_hz = 0.0;
48static double *_mel_fb = NULL;
51#define MD_F0_FFT_PROMINENCE_RATIO 2.5
53#define MD_F0_FFT_GLOBAL_RATIO 0.2
55#define MD_MFCC_LOG_FLOOR 1e-12
61 double denom = y_left - 2.0 * y_mid + y_right;
62 if (fabs(denom) < 1e-12)
return 0.0;
64 double delta = 0.5 * (y_left - y_right) / denom;
65 if (delta < -0.5) delta = -0.5;
66 if (delta > 0.5) delta = 0.5;
73 return 2595.0 * log10(1.0 + hz / 700.0);
79 return 700.0 * (pow(10.0, mel / 2595.0) - 1.0);
85 if (_spec_out) fftw_free(_spec_out);
86 if (_spec_in) free(_spec_in);
94 if (_spec_plan) fftw_destroy_plan(_spec_plan);
105 if (_spec_N == N)
return;
110 _spec_in = calloc(N,
sizeof(
double));
111 _spec_out = fftw_alloc_complex(N / 2 + 1);
113 _spec_plan = fftw_plan_dft_r2c_1d(
114 (
int)N, _spec_in, _spec_out,
115 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
133 _mel_sample_rate = 0.0;
134 _mel_min_freq_hz = 0.0;
135 _mel_max_freq_hz = 0.0;
139static void _mel_setup(
unsigned N,
double sample_rate,
unsigned num_mels,
140 double min_freq_hz,
double max_freq_hz)
142 double nyquist = 0.5 * sample_rate;
143 double f_lo = fmax(0.0, min_freq_hz);
144 double f_hi = fmin(max_freq_hz, nyquist);
148 && _mel_num_mels == num_mels
149 && _mel_sample_rate == sample_rate
150 && _mel_min_freq_hz == f_lo
151 && _mel_max_freq_hz == f_hi) {
158 _mel_num_mels = num_mels;
159 _mel_sample_rate = sample_rate;
160 _mel_min_freq_hz = f_lo;
161 _mel_max_freq_hz = f_hi;
163 unsigned num_bins = N / 2 + 1;
164 size_t fb_len = (size_t)num_mels * num_bins;
165 _mel_fb = calloc(fb_len,
sizeof(
double));
168 if (f_hi <= f_lo)
return;
170 unsigned num_points = num_mels + 2;
171 double *mel_points = malloc(num_points *
sizeof(
double));
172 double *hz_points = malloc(num_points *
sizeof(
double));
178 if (mel_max <= mel_min) {
184 double mel_step = (mel_max - mel_min) / (
double)(num_mels + 1);
185 for (
unsigned i = 0; i < num_points; i++) {
186 mel_points[i] = mel_min + mel_step * (double)i;
190 for (
unsigned m = 0; m < num_mels; m++) {
191 double f_left = hz_points[m];
192 double f_center = hz_points[m + 1];
193 double f_right = hz_points[m + 2];
194 if (f_center <= f_left || f_right <= f_center) {
198 for (
unsigned k = 0; k < num_bins; k++) {
199 double f_bin = (double)k * sample_rate / (
double)N;
201 if (f_bin > f_left && f_bin < f_right) {
202 if (f_bin <= f_center) {
203 w = (f_bin - f_left) / (f_center - f_left);
205 w = (f_right - f_bin) / (f_right - f_center);
208 if (w < 0.0) w = 0.0;
209 if (w > 1.0) w = 1.0;
210 _mel_fb[(size_t)m * num_bins + k] = w;
225 if (_stft_win_N == N)
return;
228 _stft_win = malloc(N *
sizeof(
double));
286 memcpy(_spec_in, signal, N *
sizeof(
double));
289 fftw_execute(_spec_plan);
292 unsigned num_bins = N / 2 + 1;
293 for (
unsigned k = 0; k < num_bins; k++) {
294 mag_out[k] = cabs(_spec_out[k]);
324 memcpy(_spec_in, signal, N *
sizeof(
double));
327 fftw_execute(_spec_plan);
332 unsigned num_bins = N / 2 + 1;
333 for (
unsigned k = 0; k < num_bins; k++) {
334 double re = creal(_spec_out[k]);
335 double im = cimag(_spec_out[k]);
336 psd_out[k] = (re * re + im * im) / (
double)N;
366 memcpy(_spec_in, signal, N *
sizeof(
double));
369 fftw_execute(_spec_plan);
373 unsigned num_bins = N / 2 + 1;
374 for (
unsigned k = 0; k < num_bins; k++) {
375 phase_out[k] = carg(_spec_out[k]);
381 double min_freq_hz,
double max_freq_hz)
389 unsigned k_min = (unsigned)ceil(min_freq_hz * (
double)N / sample_rate);
390 unsigned k_max = (unsigned)floor(max_freq_hz * (
double)N / sample_rate);
392 if (k_min < 1) k_min = 1;
393 if (k_max > N / 2) k_max = N / 2;
394 if (k_min > k_max)
return 0.0;
396 if (
MD_energy(signal, N) == 0.0)
return 0.0;
400 for (
unsigned n = 0; n < N; n++) {
401 _spec_in[n] = signal[n] * _stft_win[n];
403 fftw_execute(_spec_plan);
405 double global_max = 0.0;
406 for (
unsigned k = 1; k <= N / 2; k++) {
407 double mag = cabs(_spec_out[k]);
408 if (mag > global_max) global_max = mag;
411 double sum_mag = 0.0;
412 double best_mag = -DBL_MAX;
414 unsigned num_bins = 0;
416 for (
unsigned k = k_min; k <= k_max; k++) {
417 double mag = cabs(_spec_out[k]);
420 if (mag > best_mag) {
426 if (best_k == 0 || best_mag <= 0.0 || num_bins == 0)
return 0.0;
428 double mean_mag = sum_mag / (double)num_bins;
429 if (mean_mag <= 0.0)
return 0.0;
431 if (global_max <= 0.0)
return 0.0;
434 double k_est = (double)best_k;
435 if (best_k > 0 && best_k < N / 2) {
436 double m_left = cabs(_spec_out[best_k - 1]);
437 double m_mid = cabs(_spec_out[best_k]);
438 double m_right = cabs(_spec_out[best_k + 1]);
442 if (k_est < (
double)k_min) k_est = (double)k_min;
443 if (k_est > (
double)k_max) k_est = (double)k_max;
445 return k_est * sample_rate / (double)N;
456 if (signal_len < N)
return 0;
457 return (signal_len - N) / hop + 1;
479void MD_stft(
const double *signal,
unsigned signal_len,
480 unsigned N,
unsigned hop,
double *mag_out)
488 if (num_frames == 0)
return;
491 unsigned num_bins = N / 2 + 1;
493 for (
unsigned f = 0; f < num_frames; f++) {
494 const double *src = signal + (size_t)f * hop;
498 for (
unsigned n = 0; n < N; n++) {
499 _spec_in[n] = src[n] * _stft_win[n];
502 fftw_execute(_spec_plan);
507 double *row = mag_out + (size_t)f * num_bins;
508 for (
unsigned k = 0; k < num_bins; k++) {
509 row[k] = cabs(_spec_out[k]);
516 double min_freq_hz,
double max_freq_hz,
517 double *filterbank_out)
525 _mel_setup(N, sample_rate, num_mels, min_freq_hz, max_freq_hz);
527 unsigned num_bins = N / 2 + 1;
528 size_t fb_len = (size_t)num_mels * num_bins;
529 memcpy(filterbank_out, _mel_fb, fb_len *
sizeof(
double));
533 double sample_rate,
unsigned num_mels,
534 double min_freq_hz,
double max_freq_hz,
545 _mel_setup(N, sample_rate, num_mels, min_freq_hz, max_freq_hz);
547 for (
unsigned n = 0; n < N; n++) {
548 _spec_in[n] = signal[n] * _stft_win[n];
550 fftw_execute(_spec_plan);
552 unsigned num_bins = N / 2 + 1;
553 for (
unsigned m = 0; m < num_mels; m++) {
557 for (
unsigned k = 0; k < num_bins; k++) {
558 double re = creal(_spec_out[k]);
559 double im = cimag(_spec_out[k]);
560 double psd = (re * re + im * im) / (
double)N;
562 for (
unsigned m = 0; m < num_mels; m++) {
563 mel_out[m] += _mel_fb[(size_t)m * num_bins + k] * psd;
573 double cutoff_hz,
double sample_rate)
581 unsigned num_bins = len / 2 + 1;
584 double *in_buf = malloc(len *
sizeof(
double));
585 fftw_complex *freq = fftw_alloc_complex(num_bins);
589 memcpy(in_buf, signal, len *
sizeof(
double));
592 fftw_plan fwd = fftw_plan_dft_r2c_1d(
593 (
int)len, in_buf, freq, FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
594 fftw_plan inv = fftw_plan_dft_c2r_1d(
595 (
int)len, freq, signal, FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
603 unsigned cutoff_bin = (unsigned)(cutoff_hz * (
double)len / sample_rate);
604 for (
unsigned k = cutoff_bin + 1; k < num_bins; k++) {
612 for (
unsigned i = 0; i < len; i++) {
613 signal[i] /= (double)len;
616 fftw_destroy_plan(inv);
617 fftw_destroy_plan(fwd);
624 unsigned num_mels,
unsigned num_coeffs,
625 double min_freq_hz,
double max_freq_hz,
636 double *mel = malloc(num_mels *
sizeof(
double));
637 double *log_mel = malloc(num_mels *
sizeof(
double));
642 min_freq_hz, max_freq_hz, mel);
644 for (
unsigned m = 0; m < num_mels; m++) {
648 for (
unsigned c = 0; c < num_coeffs; c++) {
649 double norm = (c == 0)
650 ? sqrt(1.0 / (
double)num_mels)
651 : sqrt(2.0 / (
double)num_mels);
653 for (
unsigned m = 0; m < num_mels; m++) {
654 double angle = M_PI * (double)c * ((
double)m + 0.5)
656 sum += log_mel[m] * cos(angle);
658 mfcc_out[c] = norm * sum;
A mini library of DSP (Digital Signal Processing) routines.
@ MD_ERR_INVALID_SIZE
A size or count argument is invalid (e.g.
@ MD_ERR_INVALID_RANGE
A range or bound is invalid (e.g.
@ MD_ERR_ALLOC_FAILED
A memory allocation failed.
@ MD_ERR_NULL_POINTER
A required pointer argument is NULL.
void MD_Gen_Hann_Win(double *out, unsigned n)
Generate a Hanning (Hann) window of length n.
double MD_energy(const double *a, unsigned N)
Compute signal energy: sum of squared samples.
void md_gcc_teardown(void)
Tear down the GCC-PHAT FFT cache and reset its cached length.
Internal header for cross-file dependencies within the minidsp module.
void md_steg_teardown(void)
Tear down the BFSK sine carrier cache.
#define MD_CHECK(cond, code, msg, retval)
Check a precondition in a function that returns a value.
#define MD_CHECK_VOID(cond, code, msg)
Check a precondition in a void function.
static void _stft_setup(unsigned N)
Ensure the shared r2c plan is ready for length N, and that the STFT Hanning window buffer matches N.
void MD_power_spectral_density(const double *signal, unsigned N, double *psd_out)
Compute the power spectral density (PSD) of a real-valued signal.
static double md_hz_to_mel(double hz)
HTK mel scale conversion: Hz -> mel.
void MD_mel_energies(const double *signal, unsigned N, double sample_rate, unsigned num_mels, double min_freq_hz, double max_freq_hz, double *mel_out)
Compute mel-band energies from a single frame.
void MD_mfcc(const double *signal, unsigned N, double sample_rate, unsigned num_mels, unsigned num_coeffs, double min_freq_hz, double max_freq_hz, double *mfcc_out)
Compute mel-frequency cepstral coefficients (MFCCs) from a single frame.
static double md_parabolic_offset(double y_left, double y_mid, double y_right)
Three-point parabolic refinement around a discrete peak index.
void MD_phase_spectrum(const double *signal, unsigned N, double *phase_out)
Compute the one-sided phase spectrum of a real-valued signal.
double MD_f0_fft(const double *signal, unsigned N, double sample_rate, double min_freq_hz, double max_freq_hz)
Estimate the fundamental frequency (F0) using FFT peak picking.
#define MD_MFCC_LOG_FLOOR
Floor before logarithm in MFCC log-mel compression.
static double md_mel_to_hz(double mel)
HTK mel scale conversion: mel -> Hz.
static void _spec_free(void)
Free spectrum analysis buffers.
void MD_stft(const double *signal, unsigned signal_len, unsigned N, unsigned hop, double *mag_out)
Compute the Short-Time Fourier Transform (STFT) magnitude matrix.
static void _stft_teardown(void)
Free the cached STFT Hanning window.
static void _spec_teardown(void)
Tear down spectrum analysis plan and buffers.
static void _mel_setup(unsigned N, double sample_rate, unsigned num_mels, double min_freq_hz, double max_freq_hz)
Build the cached mel filterbank for the requested parameters.
static void _spec_setup(unsigned N)
Ensure the spectrum FFT cache matches the requested length.
void MD_lowpass_brickwall(double *signal, unsigned len, double cutoff_hz, double sample_rate)
Apply a brickwall lowpass filter to a signal in-place.
void MD_shutdown(void)
Free all internally cached FFT plans and buffers.
#define MD_F0_FFT_GLOBAL_RATIO
Candidate must also be a meaningful fraction of the frame's global peak.
void MD_magnitude_spectrum(const double *signal, unsigned N, double *mag_out)
Compute the magnitude spectrum of a real-valued signal.
static void _mel_teardown(void)
Free mel filterbank cache.
void MD_mel_filterbank(unsigned N, double sample_rate, unsigned num_mels, double min_freq_hz, double max_freq_hz, double *filterbank_out)
Build a mel-spaced triangular filterbank matrix.
unsigned MD_stft_num_frames(unsigned signal_len, unsigned N, unsigned hop)
Compute the number of complete STFT frames for the given parameters.
#define MD_F0_FFT_PROMINENCE_RATIO
F0-FFT peak must stand out from the average in-range spectrum.