48 return (*(
const char *)&i) == 0;
55 for (
size_t lo = 0, hi = n - 1; hi > lo; lo++, hi--) {
63#define SWAP(x) swap_bytes(&(x), sizeof(x))
90 memset(&sfinfo, 0,
sizeof(sfinfo));
93 SNDFILE *sf = sf_open(infile, SFM_READ, &sfinfo);
95 fprintf(stderr,
"Error opening audio file: %s\n", infile);
102 if (sfinfo.channels != 1) {
104 "Input file has %d channels. Only mono files are supported.\n"
105 "Use 'sox' to split multi-channel files.\n",
114 sf_count_t nsamps = sfinfo.frames;
118 sf_command(sf, SFC_SET_NORM_FLOAT,
nullptr, SF_TRUE);
120 sf_command(sf, SFC_SET_NORM_FLOAT,
nullptr, SF_FALSE);
123 float *tmpdata = malloc((
size_t)nsamps *
sizeof(
float));
124 if (tmpdata ==
nullptr) {
125 fprintf(stderr,
"Error allocating memory for audio data\n");
133 sf_count_t nread = sf_read_float(sf, tmpdata, nsamps);
134 if (nread != nsamps) {
135 fprintf(stderr,
"Error reading %s: expected %ld samples, got %ld\n",
136 infile, (
long)nsamps, (
long)nread);
149 *datalen = (size_t)nsamps;
150 *samprate = (unsigned)sfinfo.samplerate;
168 const float **outvecs,
172 FILE *f = fopen(outfile,
"wb");
174 fprintf(stderr,
"Error opening output file: %s\n", outfile);
180 int dict_len = snprintf(dict,
sizeof(dict),
181 "{'descr': '<f4', 'fortran_order': False, 'shape': (%zu, %zu), }",
187 size_t unpadded = prefix + (size_t)dict_len + 1;
188 size_t padded = ((unpadded + 63) / 64) * 64;
189 size_t header_len = padded - prefix;
192 char *header = malloc(header_len);
193 if (header ==
nullptr) {
194 fprintf(stderr,
"Error allocating npy header\n");
198 memcpy(header, dict, (
size_t)dict_len);
199 memset(header + dict_len,
' ', header_len - (
size_t)dict_len);
200 header[header_len - 1] =
'\n';
203 unsigned char magic[10];
205 magic[1] =
'N'; magic[2] =
'U'; magic[3] =
'M'; magic[4] =
'P'; magic[5] =
'Y';
208 uint16_t hlen = (uint16_t)header_len;
209 magic[8] = (
unsigned char)(hlen & 0xFF);
210 magic[9] = (
unsigned char)((hlen >> 8) & 0xFF);
212 fwrite(magic, 1, 10, f);
213 fwrite(header, 1, header_len, f);
217 for (
size_t i = 0; i < nvecs; i++) {
218 fwrite(outvecs[i],
sizeof(
float), veclen, f);
237 const float **outvecs,
241 FILE *f = fopen(outfile,
"wb");
243 fprintf(stderr,
"Error opening output file: %s\n", outfile);
247 size_t nbytes = nvecs * veclen *
sizeof(float);
251 int json_len = snprintf(json,
sizeof(json),
252 "{\"features\":{\"dtype\":\"F32\",\"shape\":[%zu,%zu],\"data_offsets\":[0,%zu]}}",
253 nvecs, veclen, nbytes);
256 uint64_t hsize = (uint64_t)json_len;
257 unsigned char hsize_bytes[8];
258 for (
int i = 0; i < 8; i++) {
259 hsize_bytes[i] = (
unsigned char)((hsize >> (i * 8)) & 0xFF);
261 fwrite(hsize_bytes, 1, 8, f);
264 fwrite(json, 1, (
size_t)json_len, f);
267 for (
size_t i = 0; i < nvecs; i++) {
268 fwrite(outvecs[i],
sizeof(
float), veclen, f);
288 memset(&sfinfo, 0,
sizeof(sfinfo));
289 sfinfo.samplerate = (int)samprate;
291 sfinfo.format = SF_FORMAT_WAV | SF_FORMAT_FLOAT;
293 SNDFILE *sf = sf_open(outfile, SFM_WRITE, &sfinfo);
295 fprintf(stderr,
"Error opening output WAV file: %s\n", outfile);
299 sf_writef_float(sf, data, (sf_count_t)datalen);
324 const float **outvecs,
327 unsigned vecsamprate)
331 hdr.nvecs = (uint32_t)nvecs;
332 hdr.sampperiod = (uint32_t)(1.0 / (
float)vecsamprate * 1e7);
333 hdr.vecsize = (uint16_t)(veclen *
sizeof(
float));
336 FILE *f = fopen(outfile,
"wb");
338 fprintf(stderr,
"Error opening output file: %s\n", outfile);
345 SWAP(hdr.sampperiod);
354 for (
size_t i = 0; i < nvecs; i++) {
357 for (
size_t j = 0; j < veclen; j++) {
358 float tmp = outvecs[i][j];
360 fwrite(&tmp,
sizeof(
float), 1, f);
363 fwrite(outvecs[i],
sizeof(
float), veclen, f);
int FIO_write_wav(const char *outfile, const float *data, size_t datalen, unsigned samprate)
Write mono float audio to a WAV file.
int FIO_write_safetensors(const char *outfile, const float **outvecs, size_t nvecs, size_t veclen)
Write a 2D float32 array in safetensors format.
static int is_bigendian(void)
Check if the host is big-endian.
static void swap_bytes(void *pv, size_t n)
Reverse the bytes of a value in-place (e.g.
int FIO_read_audio(const char *infile, float **indata, size_t *datalen, unsigned *samprate, unsigned donorm)
Read a single-channel audio file into a float array.
int FIO_write_npy(const char *outfile, const float **outvecs, size_t nvecs, size_t veclen)
Write a 2D float32 array in NumPy .npy v1.0 format.
int FIO_write_htk_feats(const char *outfile, const float **outvecs, size_t nvecs, size_t veclen, unsigned vecsamprate)
Write feature vectors in HTK binary file format.
#define SWAP(x)
Convenience macro: swap the bytes of variable x.
Audio file I/O and feature vector writing.