/*---------------------------------------------------------------------------*\
FILE........: c2sim.c
AUTHOR......: David Rowe
DATE CREATED: 20/8/2010
Codec2 simulation. Combines encoder and decoder and allows
switching in and out various algorithms and quantisation steps. Used
for algorithm development.
\*---------------------------------------------------------------------------*/
/*
Copyright (C) 2009 David Rowe
All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License version 2.1, as
published by the Free Software Foundation. This program is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include "defines.h"
#include "sine.h"
#include "nlp.h"
#include "dump.h"
#include "lpc.h"
#include "lsp.h"
#include "quantise.h"
#include "phase.h"
#include "postfilter.h"
#include "interp.h"
#include "bpf.h"
#include "bpfb.h"
#include "newamp1.h"
#include "lpcnet_freq.h"
#include "sd.h"
void synth_one_frame(int n_samp, codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem, float gain);
void print_help(const struct option *long_options, int num_opts, char* argv[]);
#define N_SAMP n_samp /* quick fix for run time sample rate selection */
/*---------------------------------------------------------------------------*\
MAIN
\*---------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
int Fs = 8000;
int set_fs;
int lpc_model = 0, order = LPC_ORD;
int lsp = 0, lspd = 0, lspvq = 0;
int lspjmv = 0;
int prede = 0;
int postfilt;
int hand_voicing = 0, hi = 0, simlpcpf = 0, modelin=0, modelout=0;
int lpcpf = 0;
FILE *fvoicing = 0;
int dec;
int decimate = 1;
int amread, Woread, pahw;
int awread;
int hmread;
int phase0 = 0;
int scalar_quant_Wo_e = 0;
int scalar_quant_Wo_e_low = 0;
int vector_quant_Wo_e = 0;
int dump_pitch_e = 0;
float gain = 1.0;
int bpf_en = 0;
int bpfb_en = 0;
FILE *fam = NULL, *fWo = NULL;
FILE *faw = NULL;
FILE *fhm = NULL;
FILE *fjmv = NULL;
FILE *flspEWov = NULL;
FILE *ften_ms_centre = NULL;
FILE *fmodelout = NULL;
FILE *fmodelin = NULL;
#ifdef DUMP
int dump;
#endif
char out_file[MAX_STR];
FILE *fout = NULL; /* output speech file */
int rateK = 0, newamp1vq = 0, rate_K_dec = 0, perframe=0;
int bands = 0, bands_lower_en;
float bands_lower = -1E32;
int K = 20;
float framelength_s = N_S;
int lspEWov = 0, rateKWov = 0, first = 0;
FILE *frateKWov = NULL;
int ten_ms_centre = 0;
FILE *fphasenn = NULL;
FILE *frateK = NULL;
FILE *frateKin = NULL;
int rateKout, rateKin;
FILE *fbands = NULL;
int bands_resample = 0;
char* opt_string = "ho:";
struct option long_options[] = {
{ "Fs", required_argument, &set_fs, 1 },
{ "rateK", no_argument, &rateK, 1 },
{ "perframe", no_argument, &perframe, 1 },
{ "newamp1vq", no_argument, &newamp1vq, 1 },
{ "rateKdec", required_argument, &rate_K_dec, 1 },
{ "rateKout", required_argument, &rateKout, 1 },
{ "rateKin", required_argument, &rateKin, 1 },
{ "bands",required_argument, &bands, 1 },
{ "bands_lower",required_argument, &bands_lower_en, 1 },
{ "bands_resample", no_argument, &bands_resample, 1 },
{ "lpc", required_argument, &lpc_model, 1 },
{ "lsp", no_argument, &lsp, 1 },
{ "lspd", no_argument, &lspd, 1 },
{ "lspvq", no_argument, &lspvq, 1 },
{ "lspjmv", no_argument, &lspjmv, 1 },
{ "phase0", no_argument, &phase0, 1 },
{ "postfilter", no_argument, &postfilt, 1 },
{ "hand_voicing", required_argument, &hand_voicing, 1 },
{ "dec", required_argument, &dec, 1 },
{ "hi", no_argument, &hi, 1 },
{ "simlpcpf", no_argument, &simlpcpf, 1 },
{ "lpcpf", no_argument, &lpcpf, 1 },
{ "prede", no_argument, &prede, 1 },
{ "dump_pitch_e", required_argument, &dump_pitch_e, 1 },
{ "sq_pitch_e", no_argument, &scalar_quant_Wo_e, 1 },
{ "sq_pitch_e_low", no_argument, &scalar_quant_Wo_e_low, 1 },
{ "vq_pitch_e", no_argument, &vector_quant_Wo_e, 1 },
{ "rate", required_argument, NULL, 0 },
{ "gain", required_argument, NULL, 0 },
{ "bpf", no_argument, &bpf_en, 1 },
{ "bpfb", no_argument, &bpfb_en, 1 },
{ "amread", required_argument, &amread, 1 },
{ "hmread", required_argument, &hmread, 1 },
{ "awread", required_argument, &awread, 1 },
{ "Woread", required_argument, &Woread, 1 },
{ "pahw", required_argument, &pahw, 1 },
{ "lspEWov", required_argument, &lspEWov, 1 },
{ "rateKWov", required_argument, &rateKWov, 1 },
{ "first", no_argument, &first, 1 },
{ "ten_ms_centre", required_argument, &ten_ms_centre, 1 },
{ "framelength_s", required_argument, NULL, 0 },
{ "modelout", required_argument, &modelout, 1 },
{ "modelin", required_argument, &modelin, 1 },
#ifdef DUMP
{ "dump", required_argument, &dump, 1 },
#endif
{ "help", no_argument, NULL, 'h' },
{ NULL, no_argument, NULL, 0 }
};
int num_opts=sizeof(long_options)/sizeof(struct option);
/*----------------------------------------------------------------*\
Interpret Command Line Arguments
\*----------------------------------------------------------------*/
if (argc < 2) {
print_help(long_options, num_opts, argv);
}
while(1) {
int option_index = 0;
int opt = getopt_long(argc, argv, opt_string,
long_options, &option_index);
if (opt == -1)
break;
switch (opt) {
case 0:
if(strcmp(long_options[option_index].name, "Fs") == 0) {
Fs= atoi(optarg);
if((Fs != 8000) && (Fs != 16000)) {
fprintf(stderr, "Error Fs must be 8000 or 16000\n");
exit(1);
}
} else if(strcmp(long_options[option_index].name, "lpc") == 0) {
order = atoi(optarg);
#ifdef DUMP
} else if(strcmp(long_options[option_index].name, "dump") == 0) {
if (dump)
dump_on(optarg);
#endif
} else if(strcmp(long_options[option_index].name, "lsp") == 0
|| strcmp(long_options[option_index].name, "lspd") == 0
|| strcmp(long_options[option_index].name, "lspvq") == 0) {
assert(order == LPC_ORD);
} else if(strcmp(long_options[option_index].name, "rateKdec") == 0) {
rate_K_dec = atoi(optarg);
fprintf(stderr, "rate_K_dec: %d\n", rate_K_dec);
} else if(strcmp(long_options[option_index].name, "rateKout") == 0) {
/* read model records from file or stdin */
if ((frateK = fopen(optarg,"wb")) == NULL) {
fprintf(stderr, "Error opening output rateK file: %s: %s\n",
optarg, strerror(errno));
exit(1);
}
fprintf(stderr, "each record is %d bytes\n", (int)(K*sizeof(float)));
} else if(strcmp(long_options[option_index].name, "rateKin") == 0) {
/* read model records from file or stdin */
if ((frateKin = fopen(optarg,"rb")) == NULL) {
fprintf(stderr, "Error opening input rateK file: %s: %s\n",
optarg, strerror(errno));
exit(1);
}
fprintf(stderr, "each record is %d bytes\n", (int)(K*sizeof(float)));
} else if(strcmp(long_options[option_index].name, "bands") == 0) {
/* write mel spaced band energies to file or stdout */
if ((fbands = fopen(optarg,"wb")) == NULL) {
fprintf(stderr, "Error opening bands file: %s: %s\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "bands_lower") == 0) {
bands_lower = atof(optarg);
fprintf(stderr, "bands_lower: %f\n", bands_lower);
} else if(strcmp(long_options[option_index].name, "dec") == 0) {
decimate = atoi(optarg);
if ((decimate != 2) && (decimate != 3) && (decimate != 4)) {
fprintf(stderr, "Error in --dec, must be 2, 3, or 4\n");
exit(1);
}
if (!phase0) {
fprintf(stderr, "needs --phase0 to resample phase when using --dec\n");
exit(1);
}
if (!lpc_model) {
fprintf(stderr, "needs --lpc [order] to resample amplitudes when using --dec\n");
exit(1);
}
} else if(strcmp(long_options[option_index].name, "hand_voicing") == 0) {
if ((fvoicing = fopen(optarg,"rt")) == NULL) {
fprintf(stderr, "Error opening voicing file: %s: %s.\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "Woread") == 0) {
if ((fWo = fopen(optarg,"rb")) == NULL) {
fprintf(stderr, "Error opening float Wo file: %s: %s.\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "amread") == 0) {
if ((fam = fopen(optarg,"rb")) == NULL) {
fprintf(stderr, "Error opening float Am file: %s: %s.\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "hmread") == 0) {
if ((fhm = fopen(optarg,"rb")) == NULL) {
fprintf(stderr, "Error opening float Hm file: %s: %s.\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "awread") == 0) {
if ((faw = fopen(optarg,"rb")) == NULL) {
fprintf(stderr, "Error opening float Aw file: %s: %s.\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "dump_pitch_e") == 0) {
if ((fjmv = fopen(optarg,"wt")) == NULL) {
fprintf(stderr, "Error opening pitch & energy dump file: %s: %s.\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "gain") == 0) {
gain = atof(optarg);
} else if(strcmp(long_options[option_index].name, "framelength_s") == 0) {
framelength_s = atof(optarg);
} else if(strcmp(long_options[option_index].name, "pahw") == 0) {
/* set up a bunch of arguments instead of having to enter them on cmd line every time */
phase0 = postfilt = amread = hmread = Woread = 1;
char file_name[MAX_STR];
sprintf(file_name, "%s_am.out", optarg);
fprintf(stderr, "reading %s", file_name);
if ((fam = fopen(file_name,"rb")) == NULL) {
fprintf(stderr, "Error opening float Am file: %s: %s.\n",
file_name, strerror(errno));
exit(1);
}
sprintf(file_name, "%s_hm.out", optarg);
fprintf(stderr, " %s", file_name);
if ((fhm = fopen(file_name,"rb")) == NULL) {
fprintf(stderr, "Error opening float Hm file: %s: %s.\n",
file_name, strerror(errno));
exit(1);
}
sprintf(file_name, "%s_Wo.out", optarg);
fprintf(stderr, " %s\n", file_name);
if ((fWo = fopen(file_name,"rb")) == NULL) {
fprintf(stderr, "Error opening float Wo file: %s: %s.\n",
file_name, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "lspEWov") == 0) {
/* feature file for deep learning experiments */
lpc_model = 1; phase0 = 1;
if ((flspEWov = fopen(optarg,"wb")) == NULL) {
fprintf(stderr, "Error opening lspEWov float file: %s: %s\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "rateKWov") == 0) {
/* feature file for deep learning experiments */
rateK = 1; newamp1vq = 1;
if ((frateKWov = fopen(optarg,"wb")) == NULL) {
fprintf(stderr, "Error opening rateKWov float file: %s: %s\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "ten_ms_centre") == 0) {
/* dump 10ms of audio centred on analysis frame to check time alignment with
16 kHz source audio */
ten_ms_centre = 1;
if ((ften_ms_centre = fopen(optarg,"wb")) == NULL) {
fprintf(stderr, "Error opening ten_ms_centre short file: %s: %s\n",
optarg, strerror(errno));
exit(1);
}
} else if(strcmp(long_options[option_index].name, "modelout") == 0) {
/* write model records to file or stdout */
modelout = 1;
if (strcmp(optarg, "-") == 0) fmodelout = stdout;
else if ((fmodelout = fopen(optarg,"wb")) == NULL) {
fprintf(stderr, "Error opening modelout file: %s: %s\n",
optarg, strerror(errno));
exit(1);
}
fprintf(stderr, "each model record is %d bytes\n", (int)sizeof(MODEL));
} else if(strcmp(long_options[option_index].name, "modelin") == 0) {
/* read model records from file or stdin */
modelin = 1;
if (strcmp(optarg, "-") == 0) fmodelin = stdin;
else if ((fmodelin = fopen(optarg,"rb")) == NULL) {
fprintf(stderr, "Error opening modelin file: %s: %s\n",
optarg, strerror(errno));
exit(1);
}
fprintf(stderr, "each model record is %d bytes\n", (int)sizeof(MODEL));
} else if(strcmp(long_options[option_index].name, "rate") == 0) {
if(strcmp(optarg,"3200") == 0) {
lpc_model = 1;
scalar_quant_Wo_e = 1;
lspd = 1;
phase0 = 1;
postfilt = 1;
decimate = 1;
lpcpf = 1;
} else if(strcmp(optarg,"2400") == 0) {
lpc_model = 1;
vector_quant_Wo_e = 1;
lsp = 1;
phase0 = 1;
postfilt = 1;
decimate = 2;
lpcpf = 1;
} else if(strcmp(optarg,"1400") == 0) {
lpc_model = 1;
vector_quant_Wo_e = 1;
lsp = 1;
phase0 = 1;
postfilt = 1;
decimate = 4;
lpcpf = 1;
} else if(strcmp(optarg,"1300") == 0) {
lpc_model = 1;
scalar_quant_Wo_e = 1;
lsp = 1;
phase0 = 1;
postfilt = 1;
decimate = 4;
lpcpf = 1;
} else if(strcmp(optarg,"1200") == 0) {
lpc_model = 1;
scalar_quant_Wo_e = 1;
lspjmv = 1;
phase0 = 1;
postfilt = 1;
decimate = 4;
lpcpf = 1;
} else {
fprintf(stderr, "Error: invalid output rate (3200|2400|1400|1200) %s\n", optarg);
exit(1);
}
}
break;
case 'h':
print_help(long_options, num_opts, argv);
break;
case 'o':
if (strcmp(optarg, "-") == 0) fout = stdout;
else if ((fout = fopen(optarg,"wb")) == NULL) {
fprintf(stderr, "Error opening output speech file: %s: %s.\n",
optarg, strerror(errno));
exit(1);
}
strcpy(out_file,optarg);
break;
default:
/* This will never be reached */
break;
}
}
/* Input file */
FILE *fin; /* input speech file */
if (strcmp(argv[optind], "-") == 0) fin = stdin;
else if ((fin = fopen(argv[optind],"rb")) == NULL) {
fprintf(stderr, "Error opening input speech file: %s: %s.\n",
argv[optind], strerror(errno));
exit(1);
}
C2CONST c2const = c2const_create(Fs, framelength_s);
int n_samp = c2const.n_samp;
int m_pitch = c2const.m_pitch;
short buf[N_SAMP]; /* input/output buffer */
float buf_float[N_SAMP];
float Sn[m_pitch]; /* float input speech samples */
float Sn_pre[m_pitch]; /* pre-emphasised input speech samples */
COMP Sw[FFT_ENC]; /* DFT of Sn[] */
codec2_fft_cfg fft_fwd_cfg;
codec2_fftr_cfg fftr_fwd_cfg;
codec2_fftr_cfg fftr_inv_cfg;
float w[m_pitch]; /* time domain hamming window */
float W[FFT_ENC]; /* DFT of w[] */
MODEL model;
float Pn[2*N_SAMP]; /* trapezoidal synthesis window */
float Sn_[2*N_SAMP]; /* synthesised speech */
int i,m; /* loop variable */
int frames;
float prev_f0;
float pitch;
float snr;
float sum_snr;
float pre_mem = 0.0, de_mem = 0.0;
float ak[1+order];
// COMP Sw_[FFT_ENC];
// COMP Ew[FFT_ENC];
float ex_phase[MAX_AMP+1];
float bg_est = 0.0;
MODEL prev_model;
float lsps[order];
float e, prev_e;
int lsp_indexes[order];
float lsps_[order];
float Woe_[2];
float lsps_dec[4][order], e_dec[4], weight, weight_inc, ak_dec[4][order];
MODEL model_dec[4], prev_model_dec;
float prev_lsps_dec[order], prev_e_dec;
void *nlp_states;
float hpf_states[2];
#if 0
struct PEXP *pexp = NULL;
struct AEXP *aexp = NULL;
#endif
float bpf_buf[BPF_N+N_SAMP];
COMP Aw[FFT_ENC];
COMP H[MAX_AMP];
float sd_sum = 0.0; int sd_frames = 0;
for(i=0; i bands_lower)
assert(fwrite(bandE, sizeof(float), nbands, fbands) == nbands);
// optionally reconstruct [Am} by linear interpolation of band energies,
// this doesn't sound very Good
if (bands_resample)
resample_rate_L(&c2const, &model, &bandE[1], &freqkHz[1], nbands-2);
}
/*------------------------------------------------------------*\
Optional newamp1 simulation, as used in 700C
\*------------------------------------------------------------*/
if (rateK) {
float rate_K_vec[K];
resample_const_rate_f(&c2const, &model, rate_K_vec, rate_K_sample_freqs_kHz, K);
if (frateK != NULL)
assert(fwrite(rate_K_vec, sizeof(float), K, frateK) == K);
if (frateKin != NULL) {
assert(fread(rate_K_vec, sizeof(float), K, frateKin) == K);
/* apply newamp1 postfilter - this helped male samples with VQVAE work */
float sum = 0.0;
for(int k=0; k bands_lower)
fwrite(&model_dec[i],sizeof(MODEL),1,fmodelout);
}
else
fwrite(&model_dec[i],sizeof(MODEL),1,fmodelout);
}
}
/* update memories for next frame ----------------------------*/
prev_model_dec = model_dec[decimate-1];
prev_e_dec = e_dec[decimate-1];
for(i=0; i{Am} SNR av: %5.2f dB over %d frames\n", sum_snr/frames, frames);
if (lsp || lspd || lspjmv)
fprintf(stderr, "LSP quantiser SD: %5.2f dB*dB over %d frames\n", sd_sum/sd_frames, sd_frames);
}
if (newamp1vq) {
fprintf(stderr, "var: %3.2f dB*dB\n", se/nse);
}
#ifdef DUMP
if (dump)
dump_off();
#endif
if (hand_voicing)
fclose(fvoicing);
nlp_destroy(nlp_states);
if (fam != NULL) fclose(fam);
if (fWo != NULL) fclose(fWo);
if (faw != NULL) fclose(faw);
if (fhm != NULL) fclose(fhm);
if (fjmv != NULL) fclose(fjmv);
if (flspEWov != NULL) fclose(flspEWov);
if (fphasenn != NULL) fclose(fphasenn);
if (frateK != NULL) fclose(frateK);
if (frateKin != NULL) fclose(frateKin);
if (ften_ms_centre != NULL) fclose(ften_ms_centre);
if (fmodelout != NULL) fclose(fmodelout);
if (fbands != NULL) fclose(fbands);
if (frateKWov != NULL) fclose(frateKWov);
return 0;
}
void synth_one_frame(int n_samp, codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[],
float Pn[], int prede, float *de_mem, float gain)
{
int i;
synthesise(n_samp, fftr_inv_cfg, Sn_, model, Pn, 1);
if (prede)
de_emp(Sn_, Sn_, de_mem, n_samp);
for(i=0; i 32767.0)
buf[i] = 32767;
else if (Sn_[i] < -32767.0)
buf[i] = -32767;
else
buf[i] = Sn_[i];
}
}
void print_help(const struct option* long_options, int num_opts, char* argv[])
{
int i;
char *option_parameters;
fprintf(stderr, "\nCodec2 - low bit rate speech codec - Simulation Program\n"
"\thttp://rowetel.com/codec2.html\n\n"
"usage: %s [OPTIONS] \n\n"
"Options:\n"
"\t-o \n", argv[0]);
for(i=0; i