|
miniDSP
A small C library for audio DSP
|
Core DSP routines: signal measurements, scaling, and GCC-PHAT. More...
#include "minidsp.h"
Go to the source code of this file.
Functions | |
| static void | _xcorr_free (void) |
| Free all dynamically allocated FFT buffers. | |
| static void | _xcorr_malloc (void) |
| Allocate all buffers needed for cross-correlation of length _N. | |
| static void | _xcorr_teardown (void) |
| Destroy existing FFTW plans and free all buffers. | |
| static void | _xcorr_setup (void) |
| Allocate buffers and create FFTW plans for signals of length _N. | |
| static void | _gcc_setup (unsigned N) |
| Ensure the cached FFT setup matches the requested signal length. | |
| static void | _spec_free (void) |
| Free spectrum analysis buffers. | |
| static void | _spec_teardown (void) |
| Tear down spectrum analysis plan and buffers. | |
| static void | _spec_setup (unsigned N) |
| Ensure the spectrum FFT cache matches the requested length. | |
| static void | _stft_teardown (void) |
| Free the cached STFT Hanning window. | |
| static void | _stft_setup (unsigned N) |
| Ensure the shared r2c plan is ready for length N, and that the STFT Hanning window buffer matches N. | |
| static void | _fftshift (const double *in, double *out, unsigned N) |
| Shift an FFT output so that the zero-lag is in the middle. | |
| static void | _max_index (const double *a, unsigned N, double *max, unsigned *maxi) |
| Find the index and value of the maximum element in an array. | |
| void | MD_shutdown (void) |
| Free all internally cached FFT plans and buffers. | |
| double | MD_dot (const double *a, const double *b, unsigned N) |
| Dot product of two vectors a and b, each of length N. | |
| double | MD_energy (const double *a, unsigned N) |
| Signal energy: the sum of squared samples. | |
| double | MD_power (const double *a, unsigned N) |
| Signal power: energy divided by the number of samples. | |
| double | MD_power_db (const double *a, unsigned N) |
| Signal power expressed in decibels (dB). | |
| double | MD_scale (double in, double oldmin, double oldmax, double newmin, double newmax) |
| Linearly map a value from one range to another. | |
| void | MD_scale_vec (double *in, double *out, unsigned N, double oldmin, double oldmax, double newmin, double newmax) |
| Apply MD_scale() to every element of a vector. | |
| void | MD_fit_within_range (double *in, double *out, unsigned N, double newmin, double newmax) |
| Squeeze values into [newmin, newmax] only if they don't already fit. | |
| void | MD_adjust_dblevel (const double *in, double *out, unsigned N, double dblevel) |
| Automatic Gain Control (AGC): adjust a signal to a target dB level. | |
| double | MD_entropy (const double *a, unsigned N, bool clip) |
| Compute the normalised entropy of a distribution. | |
| void | MD_Gen_Hann_Win (double *out, unsigned n) |
| Generate a Hanning (raised cosine) window. | |
| void | MD_sine_wave (double *output, unsigned N, double amplitude, double freq, double sample_rate) |
| Generate a sine wave. | |
| void | MD_white_noise (double *output, unsigned N, double amplitude, unsigned seed) |
| Generate Gaussian white noise. | |
| void | MD_impulse (double *output, unsigned N, double amplitude, unsigned position) |
| Generate a discrete impulse (Kronecker delta). | |
| void | MD_chirp_linear (double *output, unsigned N, double amplitude, double f_start, double f_end, double sample_rate) |
| Generate a linear chirp (swept sine with linearly increasing frequency). | |
| void | MD_chirp_log (double *output, unsigned N, double amplitude, double f_start, double f_end, double sample_rate) |
| Generate a logarithmic chirp (swept sine with exponentially increasing frequency). | |
| void | MD_square_wave (double *output, unsigned N, double amplitude, double freq, double sample_rate) |
| Generate a square wave. | |
| void | MD_sawtooth_wave (double *output, unsigned N, double amplitude, double freq, double sample_rate) |
| Generate a sawtooth wave. | |
| void | MD_magnitude_spectrum (const double *signal, unsigned N, double *mag_out) |
| Compute the magnitude spectrum of a real-valued signal. | |
| void | MD_power_spectral_density (const double *signal, unsigned N, double *psd_out) |
| Compute the power spectral density (PSD) of a real-valued signal. | |
| void | MD_phase_spectrum (const double *signal, unsigned N, double *phase_out) |
| Compute the one-sided phase spectrum of a real-valued signal. | |
| unsigned | MD_stft_num_frames (unsigned signal_len, unsigned N, unsigned hop) |
| Compute the number of complete STFT frames for the given parameters. | |
| void | MD_stft (const double *signal, unsigned signal_len, unsigned N, unsigned hop, double *mag_out) |
| Compute the Short-Time Fourier Transform (STFT) magnitude matrix. | |
| void | MD_get_multiple_delays (const double **sigs, unsigned M, unsigned N, unsigned margin, int weightfunc, int *outdelays) |
| Estimate delays between a reference signal and several other signals. | |
| int | MD_get_delay (const double *siga, const double *sigb, unsigned N, double *ent, unsigned margin, int weightfunc) |
| Estimate the delay (in samples) between two signals. | |
| void | MD_gcc (const double *siga, const double *sigb, unsigned N, double *lagvals, int weightfunc) |
| Compute the Generalized Cross-Correlation between two signals. | |
Variables | |
| static unsigned | _N = 0 |
| static double * | siga_loc = nullptr |
| static double * | sigb_loc = nullptr |
| static double * | lags_loc = nullptr |
| static fftw_complex * | ffta = nullptr |
| static fftw_complex * | fftb = nullptr |
| static fftw_complex * | xspec = nullptr |
| static double * | xcorr = nullptr |
| static fftw_plan | pa = nullptr |
| static fftw_plan | pb = nullptr |
| static fftw_plan | px = nullptr |
| static unsigned | _spec_N = 0 |
| static double * | _spec_in = nullptr |
| static fftw_complex * | _spec_out = nullptr |
| static fftw_plan | _spec_plan = nullptr |
| static double * | _stft_win = nullptr |
| static unsigned | _stft_win_N = 0 |
Core DSP routines: signal measurements, scaling, and GCC-PHAT.
This file implements the core digital signal processing functions declared in minidsp.h. The most interesting algorithm here is GCC-PHAT (Generalized Cross-Correlation with Phase Transform), which figures out the time delay between two microphone signals.
How GCC-PHAT works (the big picture):
Imagine you clap your hands in a room with two microphones. Microphone A hears the clap slightly before microphone B because it is closer to you. GCC-PHAT measures that tiny time difference by looking at how the two signals line up in the frequency domain.
Mathematically:
Definition in file minidsp.c.
|
static |
Shift an FFT output so that the zero-lag is in the middle.
After an inverse FFT, the zero-lag (time=0) value is at index 0, with positive lags at the start and negative lags at the end. This function rearranges the array so that the zero-lag ends up at index ceil(N/2), which is more intuitive for reading off delays.
Before: [0, +1, +2, ..., +N/2, -N/2+1, ..., -2, -1] After: [-N/2+1, ..., -1, 0, +1, ..., +N/2]
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Free the cached STFT Hanning window.
Called only from MD_shutdown().
|
static |
|
static |
|
static |
Allocate buffers and create FFTW plans for signals of length _N.
FFTW plans describe how to compute an FFT of a given size. Creating a plan is slow (FFTW tries different strategies), but executing one is very fast. That is why we cache them.
We use real-to-complex (r2c) for forward transforms because our input signals are real-valued, and complex-to-real (c2r) for the inverse transform.
|
static |
| void MD_adjust_dblevel | ( | const double * | in, |
| double * | out, | ||
| unsigned | N, | ||
| double | dblevel | ||
| ) |
Automatic Gain Control (AGC): adjust a signal to a target dB level.
Automatic Gain Control: scale a signal so its power matches the requested dB level, then clip to [-1, 1].
The input signal is assumed to be in the range [-1.0, 1.0]. The function computes a gain factor so that the output signal has the desired power level (in dB), then clips to [-1.0, 1.0] if any sample exceeds that range.
Based on Jaydeep Dhole's AGC implementation for MATLAB.
| void MD_chirp_linear | ( | double * | output, |
| unsigned | N, | ||
| double | amplitude, | ||
| double | f_start, | ||
| double | f_end, | ||
| double | sample_rate | ||
| ) |
Generate a linear chirp (swept sine with linearly increasing frequency).
The instantaneous frequency sweeps linearly from f_start to f_end over N samples:
f(t) = f_start + (f_end - f_start) * t / T
where T = (N-1) / sample_rate is the sweep duration. The output is:
output[i] = amplitude * sin(2*pi * (f_start*t + 0.5*chirp_rate*t^2))
A linear chirp is the standard test signal for spectrograms – its instantaneous frequency traces a straight diagonal line in the time-frequency plane.
| output | Output buffer of length N (caller-allocated). |
| N | Number of samples to generate. Must be > 0. |
| amplitude | Peak amplitude of the chirp (e.g. 1.0). |
| f_start | Starting frequency in Hz. |
| f_end | Ending frequency in Hz. |
| sample_rate | Sampling rate in Hz. Must be > 0. |
| void MD_chirp_log | ( | double * | output, |
| unsigned | N, | ||
| double | amplitude, | ||
| double | f_start, | ||
| double | f_end, | ||
| double | sample_rate | ||
| ) |
Generate a logarithmic chirp (swept sine with exponentially increasing frequency).
The instantaneous frequency sweeps exponentially from f_start to f_end over N samples:
f(t) = f_start * (f_end / f_start)^(t / T)
where T = (N-1) / sample_rate is the sweep duration. The output is:
output[i] = amplitude * sin(2*pi * f_start * T * (r^(t/T) - 1) / ln(r))
where r = f_end / f_start.
A logarithmic chirp spends equal time per octave, making it ideal for measuring systems whose behaviour is best described on a log-frequency axis (e.g. audio equaliser response).
| output | Output buffer of length N (caller-allocated). |
| N | Number of samples to generate. Must be > 0. |
| amplitude | Peak amplitude of the chirp (e.g. 1.0). |
| f_start | Starting frequency in Hz. Must be > 0. |
| f_end | Ending frequency in Hz. Must be > 0 and != f_start. |
| sample_rate | Sampling rate in Hz. Must be > 0. |
| double MD_dot | ( | const double * | a, |
| const double * | b, | ||
| unsigned | N | ||
| ) |
Dot product of two vectors a and b, each of length N.
Compute the dot product of two vectors.
dot = a[0]*b[0] + a[1]*b[1] + ... + a[N-1]*b[N-1]
This is one of the most fundamental operations in DSP. The dot product measures how "similar" two signals are – if they are identical the result is large; if they are unrelated it is near zero.
| double MD_energy | ( | const double * | a, |
| unsigned | N | ||
| ) |
Signal energy: the sum of squared samples.
Compute signal energy: sum of squared samples.
N-1
E = SUM a[n]^2 n=0
Energy tells you "how loud" a signal is overall. A silent signal has zero energy; a loud one has high energy. Notice that squaring makes all values positive, so negative samples contribute too.
| double MD_entropy | ( | const double * | a, |
| unsigned | N, | ||
| bool | clip | ||
| ) |
Compute the normalised entropy of a distribution.
Compute the normalized entropy of a distribution.
Entropy measures how "spread out" a distribution is:
The Shannon entropy formula is: H = -SUM( p_i * log2(p_i) ) We normalise by dividing by log2(N) so the result is always in [0, 1].
| a | Array of values representing the distribution. |
| N | Length of the array. |
| clip | If true, ignore negative values (treat them as 0). If false, use a[i]^2 so all values become non-negative. |
| void MD_fit_within_range | ( | double * | in, |
| double * | out, | ||
| unsigned | N, | ||
| double | newmin, | ||
| double | newmax | ||
| ) |
Squeeze values into [newmin, newmax] only if they don't already fit.
Fit values within [newmin, newmax].
If the input range already lies inside [newmin, newmax], the values are copied as-is (no stretching). Otherwise, the full input range is mapped onto the new range.
| void MD_gcc | ( | const double * | siga, |
| const double * | sigb, | ||
| unsigned | N, | ||
| double * | lagvals, | ||
| int | weightfunc | ||
| ) |
Compute the Generalized Cross-Correlation between two signals.
Compute the full generalized cross-correlation between two signals.
This is the core GCC-PHAT routine. It fills lagvals[] with the cross-correlation values, shifted so that the zero-lag value is at index ceil(N/2).
| siga | First signal. |
| sigb | Second signal. |
| N | Length of both signals. |
| lagvals | Output array of N doubles (pre-allocated by caller). |
| weightfunc | SIMP or PHAT. |
| void MD_Gen_Hann_Win | ( | double * | out, |
| unsigned | n | ||
| ) |
Generate a Hanning (raised cosine) window.
Generate a Hanning window of length n.
The formula is: w[i] = 0.5 * (1 - cos(2*pi*i / (n-1)))
Windowing is essential before taking an FFT because real-world signals don't start and end at exactly zero. Without a window, the abrupt edges create false high-frequency content ("spectral leakage"). The Hanning window smoothly tapers the signal to zero at both ends, reducing this artefact.
| int MD_get_delay | ( | const double * | siga, |
| const double * | sigb, | ||
| unsigned | N, | ||
| double * | ent, | ||
| unsigned | margin, | ||
| int | weightfunc | ||
| ) |
Estimate the delay (in samples) between two signals.
Estimate the delay between two signals.
The function computes the GCC between the two signals, then searches within a +/- margin window around zero-lag for the peak. The offset of that peak from zero is the estimated delay.
| siga | First signal (reference). |
| sigb | Second signal. |
| N | Length of both signals. |
| ent | If non-null, receives the normalised entropy of the lag values in the search window. High entropy (~1.0) means a flat, unreliable correlation; low entropy (~0.0) means a sharp, trustworthy peak. |
| margin | How far (in samples) to search around zero-lag. |
| weightfunc | SIMP or PHAT. |
| void MD_get_multiple_delays | ( | const double ** | sigs, |
| unsigned | M, | ||
| unsigned | N, | ||
| unsigned | margin, | ||
| int | weightfunc, | ||
| int * | outdelays | ||
| ) |
Estimate delays between a reference signal and several other signals.
Estimate delays between a reference signal and M-1 other signals.
This is a convenience wrapper that calls MD_get_delay() for each non-reference signal. A Hanning window is applied to all signals before computing the cross-correlation.
| sigs | Array of M pointers to signals. sigs[0] is the reference. |
| M | Number of signals (must be >= 2). |
| N | Length of every signal. |
| margin | Search +/- margin samples around zero-lag. |
| weightfunc | SIMP or PHAT. |
| outdelays | Output: M-1 delay values (pre-allocated by caller). |
| void MD_impulse | ( | double * | output, |
| unsigned | N, | ||
| double | amplitude, | ||
| unsigned | position | ||
| ) |
Generate a discrete impulse (Kronecker delta).
Fills the output buffer with zeros except at position, where the value is set to amplitude. A unit impulse (amplitude 1.0 at position 0) is the identity element of convolution and has a perfectly flat magnitude spectrum.
Common uses:
| output | Output buffer of length N (caller-allocated). |
| N | Number of samples to generate. Must be > 0. |
| amplitude | Value of the impulse spike (e.g. 1.0 for unit impulse). |
| position | Sample index of the spike (0-based). Must be < N. |
| void MD_magnitude_spectrum | ( | const double * | signal, |
| unsigned | N, | ||
| double * | mag_out | ||
| ) |
Compute the magnitude spectrum of a real-valued signal.
This performs a real-to-complex FFT using FFTW, then computes the absolute value (magnitude) of each complex frequency bin.
For a real signal of length N, the FFT is conjugate-symmetric, so only the first N/2 + 1 bins are unique:
The magnitude is computed as: |X(k)| = sqrt( Re(X(k))^2 + Im(X(k))^2 )
The FFT plan is cached and reused across calls of the same length, following the same pattern as the GCC functions.
| signal | Input signal of length N. |
| N | Number of samples (must be >= 2). |
| mag_out | Output: magnitudes for bins 0..N/2. Must be pre-allocated to N/2 + 1 doubles. |
| void MD_phase_spectrum | ( | const double * | signal, |
| unsigned | N, | ||
| double * | phase_out | ||
| ) |
Compute the one-sided phase spectrum of a real-valued signal.
Compute the one-sided phase spectrum of a real signal.
The phase is the argument (angle) of each complex DFT coefficient: phi(k) = atan2(Im(X(k)), Re(X(k)))
This reuses the same FFT cache as MD_magnitude_spectrum() and MD_power_spectral_density() – no additional plan allocation.
Phase is scale-invariant (multiplying a signal by a positive constant does not change its phase), so no normalisation by N is needed.
| signal | Input signal of length N. |
| N | Number of samples (must be >= 2). |
| phase_out | Output: phase in radians for bins 0..N/2. Must be pre-allocated to N/2 + 1 doubles. |
| double MD_power | ( | const double * | a, |
| unsigned | N | ||
| ) |
| double MD_power_db | ( | const double * | a, |
| unsigned | N | ||
| ) |
Signal power expressed in decibels (dB).
Compute signal power in decibels: 10 * log10(power).
P_dB = 10 * log10(P)
Decibels are a logarithmic scale commonly used in audio engineering. Every +10 dB means the power is 10x larger. A floor of 1e-10 is used to avoid log(0), which would be negative infinity.
| void MD_power_spectral_density | ( | const double * | signal, |
| unsigned | N, | ||
| double * | psd_out | ||
| ) |
Compute the power spectral density (PSD) of a real-valued signal.
The PSD is the "periodogram" estimator: PSD[k] = |X(k)|^2 / N. It reuses the same FFT cache as MD_magnitude_spectrum() – both perform the same real-to-complex FFT, only the post-processing differs.
We compute |X(k)|^2 = re^2 + im^2 directly from the real and imaginary parts, rather than calling cabs() (which computes sqrt(re^2 + im^2)) and then squaring. This avoids a redundant sqrt and is both faster and more numerically precise.
| signal | Input signal of length N. |
| N | Number of samples (must be >= 2). |
| psd_out | Output: PSD for bins 0..N/2. Must be pre-allocated to N/2 + 1 doubles. |
| void MD_sawtooth_wave | ( | double * | output, |
| unsigned | N, | ||
| double | amplitude, | ||
| double | freq, | ||
| double | sample_rate | ||
| ) |
Generate a sawtooth wave.
Fills the output buffer with a sawtooth wave that ramps linearly from −amplitude to +amplitude over each period:
\[ x[n] = A \left(\frac{\phi}{\pi} - 1\right) \]
where \(\phi = 2\pi f n / f_s \pmod{2\pi}\).
A sawtooth wave contains all integer harmonics (1f, 2f, 3f, …) whose amplitudes decay as 1/k — useful for demonstrating richer harmonic content compared to the square wave's odd-only series.
| output | Output buffer of length N (caller-allocated). |
| N | Number of samples to generate. Must be > 0. |
| amplitude | Peak amplitude of the sawtooth wave (e.g. 1.0). |
| freq | Frequency in Hz. |
| sample_rate | Sampling rate in Hz. Must be > 0. |
| double MD_scale | ( | double | in, |
| double | oldmin, | ||
| double | oldmax, | ||
| double | newmin, | ||
| double | newmax | ||
| ) |
Linearly map a value from one range to another.
Map a single value from one range to another.
Formula: out = (in - oldmin) * (newmax - newmin) / (oldmax - oldmin) + newmin
For example, mapping a value of 5 from the range [0, 10] into [0, 100] gives 50. This is sometimes called "lerp" (linear interpolation).
| void MD_scale_vec | ( | double * | in, |
| double * | out, | ||
| unsigned | N, | ||
| double | oldmin, | ||
| double | oldmax, | ||
| double | newmin, | ||
| double | newmax | ||
| ) |
Apply MD_scale() to every element of a vector.
Map every element of a vector from one range to another.
| void MD_shutdown | ( | void | ) |
| void MD_sine_wave | ( | double * | output, |
| unsigned | N, | ||
| double | amplitude, | ||
| double | freq, | ||
| double | sample_rate | ||
| ) |
Generate a sine wave.
Fills output[i] = amplitude * sin(2π * freq * i / sample_rate) for i in [0, N).
This is the simplest test signal in DSP — a pure tone at a single frequency. Use it to verify filter responses, check FFT bin alignment, or provide a clean input for any processing chain.
| output | Output buffer of length N (caller-allocated). |
| N | Number of samples to generate. Must be > 0. |
| amplitude | Peak amplitude of the sine wave (e.g. 1.0). |
| freq | Frequency in Hz. |
| sample_rate | Sampling rate in Hz. Must be > 0. |
| void MD_square_wave | ( | double * | output, |
| unsigned | N, | ||
| double | amplitude, | ||
| double | freq, | ||
| double | sample_rate | ||
| ) |
Generate a square wave.
Fills the output buffer with a bipolar square wave that alternates between +amplitude and −amplitude at the given frequency:
\[ x[n] = \begin{cases} +A & 0 < \phi < \pi \\ -A & \pi < \phi < 2\pi \\ 0 & \phi = 0 \text{ or } \phi = \pi \end{cases} \]
where \(\phi = 2\pi f n / f_s \pmod{2\pi}\).
A square wave contains only odd harmonics (1f, 3f, 5f, …) whose amplitudes decay as 1/k — a classic demonstration of the Fourier series and the Gibbs phenomenon.
| output | Output buffer of length N (caller-allocated). |
| N | Number of samples to generate. Must be > 0. |
| amplitude | Peak amplitude of the square wave (e.g. 1.0). |
| freq | Frequency in Hz. |
| sample_rate | Sampling rate in Hz. Must be > 0. |
| void MD_stft | ( | const double * | signal, |
| unsigned | signal_len, | ||
| unsigned | N, | ||
| unsigned | hop, | ||
| double * | mag_out | ||
| ) |
Compute the Short-Time Fourier Transform (STFT) magnitude matrix.
Compute the Short-Time Fourier Transform (STFT) of a real-valued signal.
Slides a Hanning-windowed r2c FFT over the signal in steps of hop samples. For each frame, the window is applied by multiplying directly into the shared _spec_in buffer (no separate memcpy pass), then FFTW executes the plan and magnitudes |X(k)| are written to the output row.
The shared spec* plan is reused so that interleaving calls to MD_stft(), MD_magnitude_spectrum(), and MD_power_spectral_density() with the same N incurs no extra plan-rebuild overhead.
| signal | Input signal. |
| signal_len | Number of samples (0 frames if < N, see header). |
| N | FFT window size (>= 2). |
| hop | Hop size (>= 1). |
| mag_out | Row-major output: mag_out[f*(N/2+1) + k] = |X_f(k)|. Must be pre-allocated by caller. |
| unsigned MD_stft_num_frames | ( | unsigned | signal_len, |
| unsigned | N, | ||
| unsigned | hop | ||
| ) |
Compute the number of complete STFT frames for the given parameters.
Compute the number of STFT frames for the given signal length and parameters.
Returns (signal_len - N) / hop + 1 when signal_len >= N, else 0. Integer division truncates, so only complete frames are counted.
| void MD_white_noise | ( | double * | output, |
| unsigned | N, | ||
| double | amplitude, | ||
| unsigned | seed | ||
| ) |
Generate Gaussian white noise.
Fills output with normally distributed random samples (mean 0, standard deviation amplitude) using the Box-Muller transform. White noise has equal energy at all frequencies – its power spectral density is approximately flat.
Use white noise to test filters, measure impulse responses, or as an additive noise source for SNR experiments.
| output | Output buffer of length N (caller-allocated). |
| N | Number of samples to generate. Must be > 0. |
| amplitude | Standard deviation of the noise (e.g. 1.0). |
| seed | Seed for the random number generator. Using the same seed produces the same output sequence, which is useful for reproducible tests. |