miniDSP
A small C library for audio DSP
Loading...
Searching...
No Matches
fileio.c
Go to the documentation of this file.
1
9#include "fileio.h"
10
11/* -----------------------------------------------------------------------
12 * HTK file format support
13 *
14 * HTK (Hidden Markov Model Toolkit) is a classic speech recognition
15 * framework. Its binary feature file format starts with a 12-byte
16 * header, followed by the feature data. All multi-byte values are
17 * stored in big-endian byte order.
18 * -----------------------------------------------------------------------*/
19
29typedef struct FIO_HTKheader {
30 uint32_t nvecs;
31 uint32_t sampperiod;
32 uint16_t vecsize;
33 uint16_t parmkind;
35
36/* -----------------------------------------------------------------------
37 * Byte-order helpers
38 *
39 * HTK files are always big-endian. Most modern PCs are little-endian,
40 * so we need to swap bytes before writing. These helpers detect the
41 * host byte order at runtime and swap if necessary.
42 * -----------------------------------------------------------------------*/
43
45static int is_bigendian(void)
46{
47 const int i = 1;
48 return (*(const char *)&i) == 0;
49}
50
52static void swap_bytes(void *pv, size_t n)
53{
54 char *p = (char *)pv;
55 for (size_t lo = 0, hi = n - 1; hi > lo; lo++, hi--) {
56 char tmp = p[lo];
57 p[lo] = p[hi];
58 p[hi] = tmp;
59 }
60}
61
63#define SWAP(x) swap_bytes(&(x), sizeof(x))
64
65/* -----------------------------------------------------------------------
66 * Public API
67 * -----------------------------------------------------------------------*/
68
83int FIO_read_audio(const char *infile,
84 float **indata,
85 size_t *datalen,
86 unsigned *samprate,
87 unsigned donorm)
88{
89 SF_INFO sfinfo;
90 memset(&sfinfo, 0, sizeof(sfinfo));
91
92 /* Open the file and read its metadata */
93 SNDFILE *sf = sf_open(infile, SFM_READ, &sfinfo);
94 if (sf == nullptr) {
95 fprintf(stderr, "Error opening audio file: %s\n", infile);
96 *indata = nullptr;
97 *datalen = 0;
98 *samprate = 0;
99 return -1;
100 }
101
102 if (sfinfo.channels != 1) {
103 fprintf(stderr,
104 "Input file has %d channels. Only mono files are supported.\n"
105 "Use 'sox' to split multi-channel files.\n",
106 sfinfo.channels);
107 sf_close(sf);
108 *indata = nullptr;
109 *datalen = 0;
110 *samprate = 0;
111 return -1;
112 }
113
114 sf_count_t nsamps = sfinfo.frames;
115
116 /* Tell libsndfile whether to normalise float output to [-1, 1] */
117 if (donorm == 1)
118 sf_command(sf, SFC_SET_NORM_FLOAT, nullptr, SF_TRUE);
119 else
120 sf_command(sf, SFC_SET_NORM_FLOAT, nullptr, SF_FALSE);
121
122 /* Allocate and read */
123 float *tmpdata = malloc((size_t)nsamps * sizeof(float));
124 if (tmpdata == nullptr) {
125 fprintf(stderr, "Error allocating memory for audio data\n");
126 sf_close(sf);
127 *indata = nullptr;
128 *datalen = 0;
129 *samprate = 0;
130 return -1;
131 }
132
133 sf_count_t nread = sf_read_float(sf, tmpdata, nsamps);
134 if (nread != nsamps) {
135 fprintf(stderr, "Error reading %s: expected %ld samples, got %ld\n",
136 infile, (long)nsamps, (long)nread);
137 free(tmpdata);
138 sf_close(sf);
139 *indata = nullptr;
140 *datalen = 0;
141 *samprate = 0;
142 return -1;
143 }
144
145 sf_close(sf);
146
147 /* Set output parameters */
148 *indata = tmpdata;
149 *datalen = (size_t)nsamps;
150 *samprate = (unsigned)sfinfo.samplerate;
151 return 0;
152}
153
154/* -----------------------------------------------------------------------
155 * NumPy .npy v1.0 writer
156 *
157 * The .npy format is NumPy's native binary format. It consists of:
158 * - 6-byte magic: \x93NUMPY
159 * - 2-byte version: 1.0
160 * - 2-byte LE u16: header string length
161 * - ASCII header: Python dict with dtype, order, shape
162 * - Raw data (little-endian)
163 *
164 * The total prefix (10 bytes + header string) must be divisible by 64.
165 * -----------------------------------------------------------------------*/
166
167int FIO_write_npy(const char *outfile,
168 const float **outvecs,
169 size_t nvecs,
170 size_t veclen)
171{
172 FILE *f = fopen(outfile, "wb");
173 if (f == nullptr) {
174 fprintf(stderr, "Error opening output file: %s\n", outfile);
175 return -1;
176 }
177
178 /* Build the ASCII header dict (without padding) */
179 char dict[256];
180 int dict_len = snprintf(dict, sizeof(dict),
181 "{'descr': '<f4', 'fortran_order': False, 'shape': (%zu, %zu), }",
182 nvecs, veclen);
183
184 /* Pad so that (10 + header_string_len) is divisible by 64.
185 * header_string includes the dict, spaces, and trailing newline. */
186 size_t prefix = 10;
187 size_t unpadded = prefix + (size_t)dict_len + 1; /* +1 for trailing \n */
188 size_t padded = ((unpadded + 63) / 64) * 64;
189 size_t header_len = padded - prefix;
190
191 /* Build the padded header string */
192 char *header = malloc(header_len);
193 if (header == nullptr) {
194 fprintf(stderr, "Error allocating npy header\n");
195 fclose(f);
196 return -1;
197 }
198 memcpy(header, dict, (size_t)dict_len);
199 memset(header + dict_len, ' ', header_len - (size_t)dict_len);
200 header[header_len - 1] = '\n';
201
202 /* Write the 10-byte prefix */
203 unsigned char magic[10];
204 magic[0] = 0x93;
205 magic[1] = 'N'; magic[2] = 'U'; magic[3] = 'M'; magic[4] = 'P'; magic[5] = 'Y';
206 magic[6] = 1; /* major version */
207 magic[7] = 0; /* minor version */
208 uint16_t hlen = (uint16_t)header_len;
209 magic[8] = (unsigned char)(hlen & 0xFF);
210 magic[9] = (unsigned char)((hlen >> 8) & 0xFF);
211
212 fwrite(magic, 1, 10, f);
213 fwrite(header, 1, header_len, f);
214 free(header);
215
216 /* Write raw float32 data, row-major */
217 for (size_t i = 0; i < nvecs; i++) {
218 fwrite(outvecs[i], sizeof(float), veclen, f);
219 }
220
221 fclose(f);
222 return 0;
223}
224
225/* -----------------------------------------------------------------------
226 * Safetensors writer
227 *
228 * The safetensors format is:
229 * - 8-byte LE u64: JSON header size
230 * - JSON header: metadata describing tensors
231 * - Raw tensor data (little-endian)
232 *
233 * Data offsets in the JSON are [start, end) — exclusive end.
234 * -----------------------------------------------------------------------*/
235
236int FIO_write_safetensors(const char *outfile,
237 const float **outvecs,
238 size_t nvecs,
239 size_t veclen)
240{
241 FILE *f = fopen(outfile, "wb");
242 if (f == nullptr) {
243 fprintf(stderr, "Error opening output file: %s\n", outfile);
244 return -1;
245 }
246
247 size_t nbytes = nvecs * veclen * sizeof(float);
248
249 /* Build JSON header */
250 char json[512];
251 int json_len = snprintf(json, sizeof(json),
252 "{\"features\":{\"dtype\":\"F32\",\"shape\":[%zu,%zu],\"data_offsets\":[0,%zu]}}",
253 nvecs, veclen, nbytes);
254
255 /* Write 8-byte LE u64 header size */
256 uint64_t hsize = (uint64_t)json_len;
257 unsigned char hsize_bytes[8];
258 for (int i = 0; i < 8; i++) {
259 hsize_bytes[i] = (unsigned char)((hsize >> (i * 8)) & 0xFF);
260 }
261 fwrite(hsize_bytes, 1, 8, f);
262
263 /* Write JSON header */
264 fwrite(json, 1, (size_t)json_len, f);
265
266 /* Write raw float32 data */
267 for (size_t i = 0; i < nvecs; i++) {
268 fwrite(outvecs[i], sizeof(float), veclen, f);
269 }
270
271 fclose(f);
272 return 0;
273}
274
275/* -----------------------------------------------------------------------
276 * WAV writer (via libsndfile)
277 *
278 * Writes mono IEEE float WAV, preserving full precision for DSP
279 * round-trips (unlike PCM_16 which quantises to 16-bit integers).
280 * -----------------------------------------------------------------------*/
281
282int FIO_write_wav(const char *outfile,
283 const float *data,
284 size_t datalen,
285 unsigned samprate)
286{
287 SF_INFO sfinfo;
288 memset(&sfinfo, 0, sizeof(sfinfo));
289 sfinfo.samplerate = (int)samprate;
290 sfinfo.channels = 1;
291 sfinfo.format = SF_FORMAT_WAV | SF_FORMAT_FLOAT;
292
293 SNDFILE *sf = sf_open(outfile, SFM_WRITE, &sfinfo);
294 if (sf == nullptr) {
295 fprintf(stderr, "Error opening output WAV file: %s\n", outfile);
296 return -1;
297 }
298
299 sf_writef_float(sf, data, (sf_count_t)datalen);
300 sf_close(sf);
301 return 0;
302}
303
304/* -----------------------------------------------------------------------
305 * HTK writer (deprecated — use FIO_write_npy or FIO_write_safetensors)
306 * -----------------------------------------------------------------------*/
307
323int FIO_write_htk_feats(const char *outfile,
324 const float **outvecs,
325 size_t nvecs,
326 size_t veclen,
327 unsigned vecsamprate)
328{
329 /* Build the 12-byte header */
330 FIO_HTKheader hdr;
331 hdr.nvecs = (uint32_t)nvecs;
332 hdr.sampperiod = (uint32_t)(1.0 / (float)vecsamprate * 1e7);
333 hdr.vecsize = (uint16_t)(veclen * sizeof(float));
334 hdr.parmkind = 9; /* 9 = USER (user-defined parameter type) */
335
336 FILE *f = fopen(outfile, "wb");
337 if (f == nullptr) {
338 fprintf(stderr, "Error opening output file: %s\n", outfile);
339 return -1;
340 }
341
342 /* HTK files are big-endian, so swap bytes on little-endian machines */
343 if (!is_bigendian()) {
344 SWAP(hdr.nvecs);
345 SWAP(hdr.sampperiod);
346 SWAP(hdr.vecsize);
347 SWAP(hdr.parmkind);
348 }
349
350 /* Write the header */
351 fwrite(&hdr, sizeof(FIO_HTKheader), 1, f);
352
353 /* Write the feature vectors */
354 for (size_t i = 0; i < nvecs; i++) {
355 if (!is_bigendian()) {
356 /* Swap each float individually */
357 for (size_t j = 0; j < veclen; j++) {
358 float tmp = outvecs[i][j];
359 SWAP(tmp);
360 fwrite(&tmp, sizeof(float), 1, f);
361 }
362 } else {
363 fwrite(outvecs[i], sizeof(float), veclen, f);
364 }
365 }
366
367 fclose(f);
368 return 0;
369}
int FIO_write_wav(const char *outfile, const float *data, size_t datalen, unsigned samprate)
Write mono float audio to a WAV file.
Definition fileio.c:282
int FIO_write_safetensors(const char *outfile, const float **outvecs, size_t nvecs, size_t veclen)
Write a 2D float32 array in safetensors format.
Definition fileio.c:236
static int is_bigendian(void)
Check if the host is big-endian.
Definition fileio.c:45
static void swap_bytes(void *pv, size_t n)
Reverse the bytes of a value in-place (e.g.
Definition fileio.c:52
int FIO_read_audio(const char *infile, float **indata, size_t *datalen, unsigned *samprate, unsigned donorm)
Read a single-channel audio file into a float array.
Definition fileio.c:83
int FIO_write_npy(const char *outfile, const float **outvecs, size_t nvecs, size_t veclen)
Write a 2D float32 array in NumPy .npy v1.0 format.
Definition fileio.c:167
int FIO_write_htk_feats(const char *outfile, const float **outvecs, size_t nvecs, size_t veclen, unsigned vecsamprate)
Write feature vectors in HTK binary file format.
Definition fileio.c:323
#define SWAP(x)
Convenience macro: swap the bytes of variable x.
Definition fileio.c:63
Audio file I/O and feature vector writing.
HTK file header structure (12 bytes total).
Definition fileio.c:29