miniDSP
A small C library for audio DSP
Loading...
Searching...
No Matches
minidsp_spectrum.c
Go to the documentation of this file.
1
10#include "minidsp.h"
11#include "minidsp_internal.h"
12
13/* -----------------------------------------------------------------------
14 * Static (file-scope) variables for spectrum analysis caching
15 *
16 * These are separate from the GCC cache (in minidsp_gcc.c) so that
17 * spectrum analysis and delay estimation can be used independently
18 * without invalidating each other's plans.
19 * -----------------------------------------------------------------------*/
20
21static unsigned _spec_N = 0; /* Cached signal length */
22static double *_spec_in = nullptr; /* Local copy of input */
23static fftw_complex *_spec_out = nullptr; /* FFT output */
24static fftw_plan _spec_plan = nullptr; /* FFTW r2c plan */
25
26/* -----------------------------------------------------------------------
27 * Static variables for STFT Hanning window cache
28 *
29 * The STFT reuses the shared _spec_* r2c plan above. Only the Hanning
30 * window buffer is separate, since different callers may use different N.
31 * -----------------------------------------------------------------------*/
32
33static double *_stft_win = nullptr; /* Cached Hanning window */
34static unsigned _stft_win_N = 0; /* Window length corresponding to above */
35
36/* -----------------------------------------------------------------------
37 * Static variables for mel filterbank cache
38 *
39 * MFCC extraction is often called frame-by-frame with fixed parameters.
40 * Cache the triangular mel matrix to avoid rebuilding it on every frame.
41 * -----------------------------------------------------------------------*/
42
43static unsigned _mel_N = 0;
44static unsigned _mel_num_mels = 0;
45static double _mel_sample_rate = 0.0;
46static double _mel_min_freq_hz = 0.0;
47static double _mel_max_freq_hz = 0.0;
48static double *_mel_fb = nullptr; /* Row-major: [num_mels][N/2+1] */
49
51#define MD_F0_FFT_PROMINENCE_RATIO 2.5
53#define MD_F0_FFT_GLOBAL_RATIO 0.2
55#define MD_MFCC_LOG_FLOOR 1e-12
56
59static double md_parabolic_offset(double y_left, double y_mid, double y_right)
60{
61 double denom = y_left - 2.0 * y_mid + y_right;
62 if (fabs(denom) < 1e-12) return 0.0;
63
64 double delta = 0.5 * (y_left - y_right) / denom;
65 if (delta < -0.5) delta = -0.5;
66 if (delta > 0.5) delta = 0.5;
67 return delta;
68}
69
71static double md_hz_to_mel(double hz)
72{
73 return 2595.0 * log10(1.0 + hz / 700.0);
74}
75
77static double md_mel_to_hz(double mel)
78{
79 return 700.0 * (pow(10.0, mel / 2595.0) - 1.0);
80}
81
83static void _spec_free(void)
84{
85 if (_spec_out) fftw_free(_spec_out);
86 if (_spec_in) free(_spec_in);
87 _spec_out = nullptr;
88 _spec_in = nullptr;
89}
90
92static void _spec_teardown(void)
93{
94 if (_spec_plan) fftw_destroy_plan(_spec_plan);
95 _spec_plan = nullptr;
96 _spec_free();
97}
98
103static void _spec_setup(unsigned N)
104{
105 if (_spec_N == N) return;
106
108 _spec_N = N;
109
110 _spec_in = calloc(N, sizeof(double));
111 _spec_out = fftw_alloc_complex(N / 2 + 1);
112
113 _spec_plan = fftw_plan_dft_r2c_1d(
114 (int)N, _spec_in, _spec_out,
115 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
116}
117
119static void _stft_teardown(void)
120{
121 free(_stft_win);
122 _stft_win = nullptr;
123 _stft_win_N = 0;
124}
125
127static void _mel_teardown(void)
128{
129 free(_mel_fb);
130 _mel_fb = nullptr;
131 _mel_N = 0;
132 _mel_num_mels = 0;
133 _mel_sample_rate = 0.0;
134 _mel_min_freq_hz = 0.0;
135 _mel_max_freq_hz = 0.0;
136}
137
139static void _mel_setup(unsigned N, double sample_rate, unsigned num_mels,
140 double min_freq_hz, double max_freq_hz)
141{
142 double nyquist = 0.5 * sample_rate;
143 double f_lo = fmax(0.0, min_freq_hz);
144 double f_hi = fmin(max_freq_hz, nyquist);
145
146 if (_mel_fb != nullptr
147 && _mel_N == N
148 && _mel_num_mels == num_mels
149 && _mel_sample_rate == sample_rate
150 && _mel_min_freq_hz == f_lo
151 && _mel_max_freq_hz == f_hi) {
152 return;
153 }
154
156
157 _mel_N = N;
158 _mel_num_mels = num_mels;
159 _mel_sample_rate = sample_rate;
160 _mel_min_freq_hz = f_lo;
161 _mel_max_freq_hz = f_hi;
162
163 unsigned num_bins = N / 2 + 1;
164 size_t fb_len = (size_t)num_mels * num_bins;
165 _mel_fb = calloc(fb_len, sizeof(double));
166 assert(_mel_fb != nullptr);
167
168 if (f_hi <= f_lo) return;
169
170 unsigned num_points = num_mels + 2;
171 double *mel_points = malloc(num_points * sizeof(double));
172 double *hz_points = malloc(num_points * sizeof(double));
173 assert(mel_points != nullptr);
174 assert(hz_points != nullptr);
175
176 double mel_min = md_hz_to_mel(f_lo);
177 double mel_max = md_hz_to_mel(f_hi);
178 if (mel_max <= mel_min) {
179 free(hz_points);
180 free(mel_points);
181 return;
182 }
183
184 double mel_step = (mel_max - mel_min) / (double)(num_mels + 1);
185 for (unsigned i = 0; i < num_points; i++) {
186 mel_points[i] = mel_min + mel_step * (double)i;
187 hz_points[i] = md_mel_to_hz(mel_points[i]);
188 }
189
190 for (unsigned m = 0; m < num_mels; m++) {
191 double f_left = hz_points[m];
192 double f_center = hz_points[m + 1];
193 double f_right = hz_points[m + 2];
194 if (f_center <= f_left || f_right <= f_center) {
195 continue;
196 }
197
198 for (unsigned k = 0; k < num_bins; k++) {
199 double f_bin = (double)k * sample_rate / (double)N;
200 double w = 0.0;
201 if (f_bin > f_left && f_bin < f_right) {
202 if (f_bin <= f_center) {
203 w = (f_bin - f_left) / (f_center - f_left);
204 } else {
205 w = (f_right - f_bin) / (f_right - f_center);
206 }
207 }
208 if (w < 0.0) w = 0.0;
209 if (w > 1.0) w = 1.0;
210 _mel_fb[(size_t)m * num_bins + k] = w;
211 }
212 }
213
214 free(hz_points);
215 free(mel_points);
216}
217
222static void _stft_setup(unsigned N)
223{
224 _spec_setup(N); /* reuse shared r2c plan */
225 if (_stft_win_N == N) return; /* fast path: window already correct */
226
227 free(_stft_win);
228 _stft_win = malloc(N * sizeof(double));
229 assert(_stft_win != nullptr);
230 MD_Gen_Hann_Win(_stft_win, N);
231 _stft_win_N = N;
232}
233
234/* -----------------------------------------------------------------------
235 * Public API: resource cleanup
236 * -----------------------------------------------------------------------*/
237
238void MD_shutdown(void)
239{
244 _spec_N = 0;
245 fftw_cleanup();
246}
247
248/* -----------------------------------------------------------------------
249 * Public API: FFT / Spectrum Analysis
250 * -----------------------------------------------------------------------*/
251
276void MD_magnitude_spectrum(const double *signal, unsigned N, double *mag_out)
277{
278 assert(signal != nullptr);
279 assert(mag_out != nullptr);
280 assert(N >= 2);
281
282 _spec_setup(N);
283
284 /* Copy input into the local buffer (FFTW may overwrite it) */
285 memcpy(_spec_in, signal, N * sizeof(double));
286
287 /* Execute the forward FFT (real -> complex) */
288 fftw_execute(_spec_plan);
289
290 /* Compute magnitude |X(k)| = sqrt(re^2 + im^2) for each bin */
291 unsigned num_bins = N / 2 + 1;
292 for (unsigned k = 0; k < num_bins; k++) {
293 mag_out[k] = cabs(_spec_out[k]);
294 }
295}
296
314void MD_power_spectral_density(const double *signal, unsigned N, double *psd_out)
315{
316 assert(signal != nullptr);
317 assert(psd_out != nullptr);
318 assert(N >= 2);
319
320 _spec_setup(N);
321
322 /* Copy input into the local buffer (FFTW may overwrite it) */
323 memcpy(_spec_in, signal, N * sizeof(double));
324
325 /* Execute the forward FFT (real -> complex) */
326 fftw_execute(_spec_plan);
327
328 /* Compute PSD[k] = |X(k)|^2 / N = (re^2 + im^2) / N.
329 * We use creal/cimag instead of cabs to avoid a redundant sqrt --
330 * cabs computes sqrt(re^2 + im^2), and we'd just square it again. */
331 unsigned num_bins = N / 2 + 1;
332 for (unsigned k = 0; k < num_bins; k++) {
333 double re = creal(_spec_out[k]);
334 double im = cimag(_spec_out[k]);
335 psd_out[k] = (re * re + im * im) / (double)N;
336 }
337}
338
356void MD_phase_spectrum(const double *signal, unsigned N, double *phase_out)
357{
358 assert(signal != nullptr);
359 assert(phase_out != nullptr);
360 assert(N >= 2);
361
362 _spec_setup(N);
363
364 /* Copy input into the local buffer (FFTW may overwrite it) */
365 memcpy(_spec_in, signal, N * sizeof(double));
366
367 /* Execute the forward FFT (real -> complex) */
368 fftw_execute(_spec_plan);
369
370 /* Compute phi(k) = atan2(Im(X(k)), Re(X(k))) for each bin.
371 * carg() from <complex.h> does exactly this and returns [-pi, pi]. */
372 unsigned num_bins = N / 2 + 1;
373 for (unsigned k = 0; k < num_bins; k++) {
374 phase_out[k] = carg(_spec_out[k]);
375 }
376}
377
378double MD_f0_fft(const double *signal, unsigned N,
379 double sample_rate,
380 double min_freq_hz, double max_freq_hz)
381{
382 assert(signal != nullptr);
383 assert(N >= 2);
384 assert(sample_rate > 0.0);
385 assert(min_freq_hz > 0.0);
386 assert(max_freq_hz > min_freq_hz);
387
388 unsigned k_min = (unsigned)ceil(min_freq_hz * (double)N / sample_rate);
389 unsigned k_max = (unsigned)floor(max_freq_hz * (double)N / sample_rate);
390
391 if (k_min < 1) k_min = 1; /* skip DC */
392 if (k_max > N / 2) k_max = N / 2; /* one-sided Nyquist bound */
393 if (k_min > k_max) return 0.0;
394
395 if (MD_energy(signal, N) == 0.0) return 0.0;
396
397 /* Reuse the shared STFT Hann cache to window this frame. */
398 _stft_setup(N);
399 for (unsigned n = 0; n < N; n++) {
400 _spec_in[n] = signal[n] * _stft_win[n];
401 }
402 fftw_execute(_spec_plan);
403
404 double global_max = 0.0;
405 for (unsigned k = 1; k <= N / 2; k++) {
406 double mag = cabs(_spec_out[k]);
407 if (mag > global_max) global_max = mag;
408 }
409
410 double sum_mag = 0.0;
411 double best_mag = -DBL_MAX;
412 unsigned best_k = 0;
413 unsigned num_bins = 0;
414
415 for (unsigned k = k_min; k <= k_max; k++) {
416 double mag = cabs(_spec_out[k]);
417 sum_mag += mag;
418 num_bins++;
419 if (mag > best_mag) {
420 best_mag = mag;
421 best_k = k;
422 }
423 }
424
425 if (best_k == 0 || best_mag <= 0.0 || num_bins == 0) return 0.0;
426
427 double mean_mag = sum_mag / (double)num_bins;
428 if (mean_mag <= 0.0) return 0.0;
429 if (best_mag < MD_F0_FFT_PROMINENCE_RATIO * mean_mag) return 0.0;
430 if (global_max <= 0.0) return 0.0;
431 if (best_mag < MD_F0_FFT_GLOBAL_RATIO * global_max) return 0.0;
432
433 double k_est = (double)best_k;
434 if (best_k > 0 && best_k < N / 2) {
435 double m_left = cabs(_spec_out[best_k - 1]);
436 double m_mid = cabs(_spec_out[best_k]);
437 double m_right = cabs(_spec_out[best_k + 1]);
438 k_est += md_parabolic_offset(m_left, m_mid, m_right);
439 }
440
441 if (k_est < (double)k_min) k_est = (double)k_min;
442 if (k_est > (double)k_max) k_est = (double)k_max;
443
444 return k_est * sample_rate / (double)N;
445}
446
453unsigned MD_stft_num_frames(unsigned signal_len, unsigned N, unsigned hop)
454{
455 if (signal_len < N) return 0;
456 return (signal_len - N) / hop + 1;
457}
458
478void MD_stft(const double *signal, unsigned signal_len,
479 unsigned N, unsigned hop, double *mag_out)
480{
481 assert(signal != nullptr);
482 assert(mag_out != nullptr);
483 assert(N >= 2);
484 assert(hop >= 1);
485
486 unsigned num_frames = MD_stft_num_frames(signal_len, N, hop);
487 if (num_frames == 0) return; /* signal too short for even one frame */
488
489 _stft_setup(N);
490 unsigned num_bins = N / 2 + 1;
491
492 for (unsigned f = 0; f < num_frames; f++) {
493 const double *src = signal + (size_t)f * hop; /* (size_t) avoids overflow */
494
495 /* Apply Hanning window directly into the shared input buffer.
496 * Touching every element anyway, so no separate memcpy is needed. */
497 for (unsigned n = 0; n < N; n++) {
498 _spec_in[n] = src[n] * _stft_win[n];
499 }
500
501 fftw_execute(_spec_plan);
502
503 /* Write magnitude row: |X_f(k)| = sqrt(re^2 + im^2).
504 * cabs() is correct here -- we need the actual sqrt magnitude,
505 * not |z|^2, so the creal/cimag shortcut from PSD does not apply. */
506 double *row = mag_out + (size_t)f * num_bins;
507 for (unsigned k = 0; k < num_bins; k++) {
508 row[k] = cabs(_spec_out[k]);
509 }
510 }
511}
512
513void MD_mel_filterbank(unsigned N, double sample_rate,
514 unsigned num_mels,
515 double min_freq_hz, double max_freq_hz,
516 double *filterbank_out)
517{
518 assert(N >= 2);
519 assert(sample_rate > 0.0);
520 assert(num_mels > 0);
521 assert(min_freq_hz < max_freq_hz);
522 assert(filterbank_out != nullptr);
523
524 _mel_setup(N, sample_rate, num_mels, min_freq_hz, max_freq_hz);
525
526 unsigned num_bins = N / 2 + 1;
527 size_t fb_len = (size_t)num_mels * num_bins;
528 memcpy(filterbank_out, _mel_fb, fb_len * sizeof(double));
529}
530
531void MD_mel_energies(const double *signal, unsigned N,
532 double sample_rate, unsigned num_mels,
533 double min_freq_hz, double max_freq_hz,
534 double *mel_out)
535{
536 assert(signal != nullptr);
537 assert(mel_out != nullptr);
538 assert(N >= 2);
539 assert(sample_rate > 0.0);
540 assert(num_mels > 0);
541 assert(min_freq_hz < max_freq_hz);
542
543 _stft_setup(N);
544 _mel_setup(N, sample_rate, num_mels, min_freq_hz, max_freq_hz);
545
546 for (unsigned n = 0; n < N; n++) {
547 _spec_in[n] = signal[n] * _stft_win[n];
548 }
549 fftw_execute(_spec_plan);
550
551 unsigned num_bins = N / 2 + 1;
552 for (unsigned m = 0; m < num_mels; m++) {
553 mel_out[m] = 0.0;
554 }
555
556 for (unsigned k = 0; k < num_bins; k++) {
557 double re = creal(_spec_out[k]);
558 double im = cimag(_spec_out[k]);
559 double psd = (re * re + im * im) / (double)N;
560
561 for (unsigned m = 0; m < num_mels; m++) {
562 mel_out[m] += _mel_fb[(size_t)m * num_bins + k] * psd;
563 }
564 }
565}
566
567void MD_mfcc(const double *signal, unsigned N,
568 double sample_rate,
569 unsigned num_mels, unsigned num_coeffs,
570 double min_freq_hz, double max_freq_hz,
571 double *mfcc_out)
572{
573 assert(signal != nullptr);
574 assert(mfcc_out != nullptr);
575 assert(N >= 2);
576 assert(sample_rate > 0.0);
577 assert(num_mels > 0);
578 assert(num_coeffs >= 1 && num_coeffs <= num_mels);
579 assert(min_freq_hz < max_freq_hz);
580
581 double *mel = malloc(num_mels * sizeof(double));
582 double *log_mel = malloc(num_mels * sizeof(double));
583 assert(mel != nullptr);
584 assert(log_mel != nullptr);
585
586 MD_mel_energies(signal, N, sample_rate, num_mels,
587 min_freq_hz, max_freq_hz, mel);
588
589 for (unsigned m = 0; m < num_mels; m++) {
590 log_mel[m] = log(fmax(mel[m], MD_MFCC_LOG_FLOOR));
591 }
592
593 for (unsigned c = 0; c < num_coeffs; c++) {
594 double norm = (c == 0)
595 ? sqrt(1.0 / (double)num_mels)
596 : sqrt(2.0 / (double)num_mels);
597 double sum = 0.0;
598 for (unsigned m = 0; m < num_mels; m++) {
599 double angle = M_PI * (double)c * ((double)m + 0.5)
600 / (double)num_mels;
601 sum += log_mel[m] * cos(angle);
602 }
603 mfcc_out[c] = norm * sum;
604 }
605
606 free(log_mel);
607 free(mel);
608}
A mini library of DSP (Digital Signal Processing) routines.
void MD_Gen_Hann_Win(double *out, unsigned n)
Generate a Hanning (Hann) window of length n.
double MD_energy(const double *a, unsigned N)
Compute signal energy: sum of squared samples.
void md_gcc_teardown(void)
Tear down the GCC-PHAT FFT cache and reset its cached length.
Internal header for cross-file dependencies within the minidsp module.
static void _stft_setup(unsigned N)
Ensure the shared r2c plan is ready for length N, and that the STFT Hanning window buffer matches N.
void MD_power_spectral_density(const double *signal, unsigned N, double *psd_out)
Compute the power spectral density (PSD) of a real-valued signal.
static double md_hz_to_mel(double hz)
HTK mel scale conversion: Hz -> mel.
void MD_mel_energies(const double *signal, unsigned N, double sample_rate, unsigned num_mels, double min_freq_hz, double max_freq_hz, double *mel_out)
Compute mel-band energies from a single frame.
void MD_mfcc(const double *signal, unsigned N, double sample_rate, unsigned num_mels, unsigned num_coeffs, double min_freq_hz, double max_freq_hz, double *mfcc_out)
Compute mel-frequency cepstral coefficients (MFCCs) from a single frame.
static double md_parabolic_offset(double y_left, double y_mid, double y_right)
Three-point parabolic refinement around a discrete peak index.
void MD_phase_spectrum(const double *signal, unsigned N, double *phase_out)
Compute the one-sided phase spectrum of a real-valued signal.
double MD_f0_fft(const double *signal, unsigned N, double sample_rate, double min_freq_hz, double max_freq_hz)
Estimate the fundamental frequency (F0) using FFT peak picking.
#define MD_MFCC_LOG_FLOOR
Floor before logarithm in MFCC log-mel compression.
static double md_mel_to_hz(double mel)
HTK mel scale conversion: mel -> Hz.
static void _spec_free(void)
Free spectrum analysis buffers.
void MD_stft(const double *signal, unsigned signal_len, unsigned N, unsigned hop, double *mag_out)
Compute the Short-Time Fourier Transform (STFT) magnitude matrix.
static void _stft_teardown(void)
Free the cached STFT Hanning window.
static void _spec_teardown(void)
Tear down spectrum analysis plan and buffers.
static void _mel_setup(unsigned N, double sample_rate, unsigned num_mels, double min_freq_hz, double max_freq_hz)
Build the cached mel filterbank for the requested parameters.
static void _spec_setup(unsigned N)
Ensure the spectrum FFT cache matches the requested length.
void MD_shutdown(void)
Free all internally cached FFT plans and buffers.
#define MD_F0_FFT_GLOBAL_RATIO
Candidate must also be a meaningful fraction of the frame's global peak.
void MD_magnitude_spectrum(const double *signal, unsigned N, double *mag_out)
Compute the magnitude spectrum of a real-valued signal.
static void _mel_teardown(void)
Free mel filterbank cache.
void MD_mel_filterbank(unsigned N, double sample_rate, unsigned num_mels, double min_freq_hz, double max_freq_hz, double *filterbank_out)
Build a mel-spaced triangular filterbank matrix.
unsigned MD_stft_num_frames(unsigned signal_len, unsigned N, unsigned hop)
Compute the number of complete STFT frames for the given parameters.
#define MD_F0_FFT_PROMINENCE_RATIO
F0-FFT peak must stand out from the average in-range spectrum.