20 for (
unsigned i = 0; i < num_bins; i++)
27 for (
unsigned i = 0; i < num_bins; i++) {
28 double p = psd[i] / total;
30 entropy -= p * log(p);
33 double max_entropy = log((
double)num_bins);
34 if (max_entropy <= 0.0)
37 return entropy / max_entropy;
47 double arith_sum = 0.0;
49 for (
unsigned i = 0; i < num_bins; i++) {
50 double val = psd[i] > 0.0 ? psd[i] : 1e-30;
55 double arith_mean = arith_sum / (double)num_bins;
56 if (arith_mean <= 0.0)
59 double log_geo_mean = log_sum / (double)num_bins;
60 double geo_mean = exp(log_geo_mean);
62 double flatness = geo_mean / arith_mean;
63 if (flatness > 1.0) flatness = 1.0;
64 if (flatness < 0.0) flatness = 0.0;
74 double sample_rate,
unsigned N,
75 double band_low_hz,
double band_high_hz)
77 double freq_per_bin = sample_rate / (double)N;
81 for (
unsigned i = 0; i < num_bins; i++) {
82 double freq = i * freq_per_bin;
84 if (freq >= band_low_hz && freq <= band_high_hz)
98#define RANGE_FLOOR 1e-12
100static void update_normalization(
MD_vad_state *state,
const double *raw)
111 + alpha * (raw[i] - state->
feat_min[i]);
114 + alpha * (raw[i] - state->
feat_max[i]);
119static void normalize_features(
const MD_vad_state *state,
120 const double *raw,
double *norm_out)
124 if (range < RANGE_FLOOR)
127 double val = (raw[i] - state->
feat_min[i]) / range;
128 if (val < 0.0) val = 0.0;
129 if (val > 1.0) val = 1.0;
138static void extract_features(
const double *signal,
unsigned N,
140 double band_low_hz,
double band_high_hz,
146 unsigned num_bins = N / 2 + 1;
147 double psd[num_bins];
159 band_low_hz, band_high_hz);
191 if (params != NULL) {
209 unsigned N,
double sample_rate)
216 extract_features(signal, N, sample_rate,
220 update_normalization(state, raw);
225 unsigned N,
double sample_rate,
226 double *score_out,
double *features_out)
234 extract_features(signal, N, sample_rate,
239 update_normalization(state, raw);
243 normalize_features(state, raw, norm);
276 if (score_out != NULL)
278 if (features_out != NULL) {
280 features_out[i] = norm[i];
A mini library of DSP (Digital Signal Processing) routines.
#define MD_VAD_NUM_FEATURES
Total number of features.
void MD_power_spectral_density(const double *signal, unsigned N, double *psd_out)
Compute the power spectral density (PSD) of a real-valued signal.
@ MD_ERR_INVALID_SIZE
A size or count argument is invalid (e.g.
@ MD_ERR_NULL_POINTER
A required pointer argument is NULL.
double MD_zero_crossing_rate(const double *a, unsigned N)
Compute the zero-crossing rate of a signal.
#define MD_VAD_FEAT_ENERGY
Frame energy.
#define MD_VAD_FEAT_ZCR
Zero-crossing rate.
#define MD_VAD_FEAT_SPECTRAL_ENTROPY
Spectral entropy.
double MD_energy(const double *a, unsigned N)
Compute signal energy: sum of squared samples.
#define MD_VAD_FEAT_BAND_ENERGY_RATIO
Band energy ratio.
#define MD_VAD_FEAT_SPECTRAL_FLATNESS
Spectral flatness.
Internal header for cross-file dependencies within the minidsp module.
#define MD_CHECK(cond, code, msg, retval)
Check a precondition in a function that returns a value.
#define MD_CHECK_VOID(cond, code, msg)
Check a precondition in a void function.
static double compute_spectral_entropy(const double *psd, unsigned num_bins)
Spectral entropy: normalize PSD to a probability distribution, return -sum(p * log(p)) / log(num_bins...
void MD_vad_default_params(MD_vad_params *params)
Populate a VAD params struct with optimized defaults.
static double compute_band_energy_ratio(const double *psd, unsigned num_bins, double sample_rate, unsigned N, double band_low_hz, double band_high_hz)
Band energy ratio: sum of PSD bins in [band_low_hz, band_high_hz] divided by total PSD sum.
int MD_vad_process_frame(MD_vad_state *state, const double *signal, unsigned N, double sample_rate, double *score_out, double *features_out)
Process one audio frame and return a binary speech decision.
static double compute_spectral_flatness(const double *psd, unsigned num_bins)
Spectral flatness: geometric mean / arithmetic mean of PSD bins.
void MD_vad_calibrate(MD_vad_state *state, const double *signal, unsigned N, double sample_rate)
Feed a known-silence frame to seed the adaptive normalization.
void MD_vad_init(MD_vad_state *state, const MD_vad_params *params)
Initialize VAD state from params.
Parameters for the VAD detector.
double threshold
Decision threshold (0.0–1.0).
double weights[MD_VAD_NUM_FEATURES]
Per-feature weights for scoring.
double band_high_hz
Upper bound of speech band (Hz).
unsigned onset_frames
Consecutive above-threshold frames before speech.
double band_low_hz
Lower bound of speech band (Hz).
double adaptation_rate
EMA rate for min/max tracking (0.0–1.0).
unsigned hangover_frames
Extra speech frames after score drops.
Internal state for the VAD detector.
unsigned onset_counter
Consecutive above-threshold count.
double feat_max[MD_VAD_NUM_FEATURES]
EMA-tracked feature maximums.
MD_vad_params params
Copy of caller params.
int current_decision
Current speech decision (0 or 1).
double feat_min[MD_VAD_NUM_FEATURES]
EMA-tracked feature minimums.
unsigned hangover_counter
Remaining hangover frames.
unsigned frames_processed
Total frames seen.