diff options
| author | drowe67 <[email protected]> | 2023-07-14 12:36:50 +0930 |
|---|---|---|
| committer | David Rowe <[email protected]> | 2023-07-14 12:36:50 +0930 |
| commit | b86e88413d4c6ec428aaedb147f7675f28882fe4 (patch) | |
| tree | ce360925856e25d4343d59a37e2e6bac142d3752 /src/c2sim.c | |
| parent | 0c2e969cfbe85548801eeb20ad8113969604892a (diff) | |
clang-format -i applied to src unittest misc
Diffstat (limited to 'src/c2sim.c')
| -rw-r--r-- | src/c2sim.c | 2105 |
1 files changed, 1062 insertions, 1043 deletions
diff --git a/src/c2sim.c b/src/c2sim.c index 01ebba1..cf23d3a 100644 --- a/src/c2sim.c +++ b/src/c2sim.c @@ -28,1141 +28,1160 @@ */ #include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <math.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <errno.h> -#include <math.h> #include <unistd.h> -#include <getopt.h> +#include "bpf.h" +#include "bpfb.h" #include "defines.h" -#include "sine.h" -#include "nlp.h" #include "dump.h" +#include "interp.h" #include "lpc.h" +#include "lpcnet_freq.h" #include "lsp.h" -#include "quantise.h" +#include "newamp1.h" +#include "nlp.h" #include "phase.h" #include "postfilter.h" -#include "interp.h" -#include "bpf.h" -#include "bpfb.h" -#include "newamp1.h" -#include "lpcnet_freq.h" +#include "quantise.h" #include "sd.h" +#include "sine.h" -void synth_one_frame(int n_samp, codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem, float gain); -void print_help(const struct option *long_options, int num_opts, char* argv[]); +void synth_one_frame(int n_samp, codec2_fftr_cfg fftr_inv_cfg, short buf[], + MODEL *model, float Sn_[], float Pn[], int prede, + float *de_mem, float gain); +void print_help(const struct option *long_options, int num_opts, char *argv[]); -#define N_SAMP n_samp /* quick fix for run time sample rate selection */ +#define N_SAMP n_samp /* quick fix for run time sample rate selection */ /*---------------------------------------------------------------------------*\ - MAIN + MAIN \*---------------------------------------------------------------------------*/ -int main(int argc, char *argv[]) -{ - - int Fs = 8000; - int set_fs; - - int lpc_model = 0, order = LPC_ORD; - int lsp = 0, lspd = 0, lspvq = 0; - int lspjmv = 0; - int prede = 0; - int postfilt; - int hand_voicing = 0, hi = 0, simlpcpf = 0, modelin=0, modelout=0; - int lpcpf = 0; - FILE *fvoicing = 0; - int dec; - int decimate = 1; - int amread, Woread, pahw; - int awread; - int hmread; - int phase0 = 0; - int scalar_quant_Wo_e = 0; - int scalar_quant_Wo_e_low = 0; - int vector_quant_Wo_e = 0; - int dump_pitch_e = 0; - float gain = 1.0; - int bpf_en = 0; - int bpfb_en = 0; - FILE *fam = NULL, *fWo = NULL; - FILE *faw = NULL; - FILE *fhm = NULL; - FILE *fjmv = NULL; - FILE *flspEWov = NULL; - FILE *ften_ms_centre = NULL; - FILE *fmodelout = NULL; - FILE *fmodelin = NULL; - #ifdef DUMP - int dump; - #endif - char out_file[MAX_STR]; - FILE *fout = NULL; /* output speech file */ - int rateK = 0, newamp1vq = 0, rate_K_dec = 0, perframe=0; - int bands = 0, bands_lower_en; - float bands_lower = -1E32; - int K = 20; - float framelength_s = N_S; - int lspEWov = 0, rateKWov = 0, first = 0; - FILE *frateKWov = NULL; - int ten_ms_centre = 0; - FILE *fphasenn = NULL; - FILE *frateK = NULL; - FILE *frateKin = NULL; - int rateKout, rateKin; - FILE *fbands = NULL; - int bands_resample = 0; - - char* opt_string = "ho:"; - struct option long_options[] = { - { "Fs", required_argument, &set_fs, 1 }, - { "rateK", no_argument, &rateK, 1 }, - { "perframe", no_argument, &perframe, 1 }, - { "newamp1vq", no_argument, &newamp1vq, 1 }, - { "rateKdec", required_argument, &rate_K_dec, 1 }, - { "rateKout", required_argument, &rateKout, 1 }, - { "rateKin", required_argument, &rateKin, 1 }, - { "bands",required_argument, &bands, 1 }, - { "bands_lower",required_argument, &bands_lower_en, 1 }, - { "bands_resample", no_argument, &bands_resample, 1 }, - { "lpc", required_argument, &lpc_model, 1 }, - { "lsp", no_argument, &lsp, 1 }, - { "lspd", no_argument, &lspd, 1 }, - { "lspvq", no_argument, &lspvq, 1 }, - { "lspjmv", no_argument, &lspjmv, 1 }, - { "phase0", no_argument, &phase0, 1 }, - { "postfilter", no_argument, &postfilt, 1 }, - { "hand_voicing", required_argument, &hand_voicing, 1 }, - { "dec", required_argument, &dec, 1 }, - { "hi", no_argument, &hi, 1 }, - { "simlpcpf", no_argument, &simlpcpf, 1 }, - { "lpcpf", no_argument, &lpcpf, 1 }, - { "prede", no_argument, &prede, 1 }, - { "dump_pitch_e", required_argument, &dump_pitch_e, 1 }, - { "sq_pitch_e", no_argument, &scalar_quant_Wo_e, 1 }, - { "sq_pitch_e_low", no_argument, &scalar_quant_Wo_e_low, 1 }, - { "vq_pitch_e", no_argument, &vector_quant_Wo_e, 1 }, - { "rate", required_argument, NULL, 0 }, - { "gain", required_argument, NULL, 0 }, - { "bpf", no_argument, &bpf_en, 1 }, - { "bpfb", no_argument, &bpfb_en, 1 }, - { "amread", required_argument, &amread, 1 }, - { "hmread", required_argument, &hmread, 1 }, - { "awread", required_argument, &awread, 1 }, - { "Woread", required_argument, &Woread, 1 }, - { "pahw", required_argument, &pahw, 1 }, - { "lspEWov", required_argument, &lspEWov, 1 }, - { "rateKWov", required_argument, &rateKWov, 1 }, - { "first", no_argument, &first, 1 }, - { "ten_ms_centre", required_argument, &ten_ms_centre, 1 }, - { "framelength_s", required_argument, NULL, 0 }, - { "modelout", required_argument, &modelout, 1 }, - { "modelin", required_argument, &modelin, 1 }, - #ifdef DUMP - { "dump", required_argument, &dump, 1 }, - #endif - { "help", no_argument, NULL, 'h' }, - { NULL, no_argument, NULL, 0 } - }; - int num_opts=sizeof(long_options)/sizeof(struct option); - - /*----------------------------------------------------------------*\ - - Interpret Command Line Arguments - - \*----------------------------------------------------------------*/ - - if (argc < 2) { - print_help(long_options, num_opts, argv); - } - - while(1) { - int option_index = 0; - int opt = getopt_long(argc, argv, opt_string, - long_options, &option_index); - if (opt == -1) - break; - switch (opt) { - case 0: - if(strcmp(long_options[option_index].name, "Fs") == 0) { - Fs= atoi(optarg); - if((Fs != 8000) && (Fs != 16000)) { - fprintf(stderr, "Error Fs must be 8000 or 16000\n"); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "lpc") == 0) { - order = atoi(optarg); - #ifdef DUMP - } else if(strcmp(long_options[option_index].name, "dump") == 0) { - if (dump) - dump_on(optarg); - #endif - } else if(strcmp(long_options[option_index].name, "lsp") == 0 - || strcmp(long_options[option_index].name, "lspd") == 0 - || strcmp(long_options[option_index].name, "lspvq") == 0) { - assert(order == LPC_ORD); - } else if(strcmp(long_options[option_index].name, "rateKdec") == 0) { - rate_K_dec = atoi(optarg); - fprintf(stderr, "rate_K_dec: %d\n", rate_K_dec); - } else if(strcmp(long_options[option_index].name, "rateKout") == 0) { - /* read model records from file or stdin */ - if ((frateK = fopen(optarg,"wb")) == NULL) { - fprintf(stderr, "Error opening output rateK file: %s: %s\n", - optarg, strerror(errno)); - exit(1); - } - fprintf(stderr, "each record is %d bytes\n", (int)(K*sizeof(float))); - } else if(strcmp(long_options[option_index].name, "rateKin") == 0) { - /* read model records from file or stdin */ - if ((frateKin = fopen(optarg,"rb")) == NULL) { - fprintf(stderr, "Error opening input rateK file: %s: %s\n", - optarg, strerror(errno)); - exit(1); - } - fprintf(stderr, "each record is %d bytes\n", (int)(K*sizeof(float))); - } else if(strcmp(long_options[option_index].name, "bands") == 0) { - /* write mel spaced band energies to file or stdout */ - if ((fbands = fopen(optarg,"wb")) == NULL) { - fprintf(stderr, "Error opening bands file: %s: %s\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "bands_lower") == 0) { - bands_lower = atof(optarg); - fprintf(stderr, "bands_lower: %f\n", bands_lower); - } else if(strcmp(long_options[option_index].name, "dec") == 0) { - - decimate = atoi(optarg); - if ((decimate != 2) && (decimate != 3) && (decimate != 4)) { - fprintf(stderr, "Error in --dec, must be 2, 3, or 4\n"); - exit(1); - } - - if (!phase0) { - fprintf(stderr, "needs --phase0 to resample phase when using --dec\n"); - exit(1); - } - if (!lpc_model) { - fprintf(stderr, "needs --lpc [order] to resample amplitudes when using --dec\n"); - exit(1); - } - - } else if(strcmp(long_options[option_index].name, "hand_voicing") == 0) { - if ((fvoicing = fopen(optarg,"rt")) == NULL) { - fprintf(stderr, "Error opening voicing file: %s: %s.\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "Woread") == 0) { - if ((fWo = fopen(optarg,"rb")) == NULL) { - fprintf(stderr, "Error opening float Wo file: %s: %s.\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "amread") == 0) { - if ((fam = fopen(optarg,"rb")) == NULL) { - fprintf(stderr, "Error opening float Am file: %s: %s.\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "hmread") == 0) { - if ((fhm = fopen(optarg,"rb")) == NULL) { - fprintf(stderr, "Error opening float Hm file: %s: %s.\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "awread") == 0) { - if ((faw = fopen(optarg,"rb")) == NULL) { - fprintf(stderr, "Error opening float Aw file: %s: %s.\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "dump_pitch_e") == 0) { - if ((fjmv = fopen(optarg,"wt")) == NULL) { - fprintf(stderr, "Error opening pitch & energy dump file: %s: %s.\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "gain") == 0) { - gain = atof(optarg); - } else if(strcmp(long_options[option_index].name, "framelength_s") == 0) { - framelength_s = atof(optarg); - } else if(strcmp(long_options[option_index].name, "pahw") == 0) { - - /* set up a bunch of arguments instead of having to enter them on cmd line every time */ - - phase0 = postfilt = amread = hmread = Woread = 1; - char file_name[MAX_STR]; - sprintf(file_name, "%s_am.out", optarg); - fprintf(stderr, "reading %s", file_name); - if ((fam = fopen(file_name,"rb")) == NULL) { - fprintf(stderr, "Error opening float Am file: %s: %s.\n", - file_name, strerror(errno)); - exit(1); - } - sprintf(file_name, "%s_hm.out", optarg); - fprintf(stderr, " %s", file_name); - if ((fhm = fopen(file_name,"rb")) == NULL) { - fprintf(stderr, "Error opening float Hm file: %s: %s.\n", - file_name, strerror(errno)); - exit(1); - } - sprintf(file_name, "%s_Wo.out", optarg); - fprintf(stderr, " %s\n", file_name); - if ((fWo = fopen(file_name,"rb")) == NULL) { - fprintf(stderr, "Error opening float Wo file: %s: %s.\n", - file_name, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "lspEWov") == 0) { - /* feature file for deep learning experiments */ - lpc_model = 1; phase0 = 1; - if ((flspEWov = fopen(optarg,"wb")) == NULL) { - fprintf(stderr, "Error opening lspEWov float file: %s: %s\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "rateKWov") == 0) { - /* feature file for deep learning experiments */ - rateK = 1; newamp1vq = 1; - if ((frateKWov = fopen(optarg,"wb")) == NULL) { - fprintf(stderr, "Error opening rateKWov float file: %s: %s\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "ten_ms_centre") == 0) { - /* dump 10ms of audio centred on analysis frame to check time alignment with - 16 kHz source audio */ - ten_ms_centre = 1; - if ((ften_ms_centre = fopen(optarg,"wb")) == NULL) { - fprintf(stderr, "Error opening ten_ms_centre short file: %s: %s\n", - optarg, strerror(errno)); - exit(1); - } - } else if(strcmp(long_options[option_index].name, "modelout") == 0) { - /* write model records to file or stdout */ - modelout = 1; - if (strcmp(optarg, "-") == 0) fmodelout = stdout; - else if ((fmodelout = fopen(optarg,"wb")) == NULL) { - fprintf(stderr, "Error opening modelout file: %s: %s\n", - optarg, strerror(errno)); - exit(1); - } - fprintf(stderr, "each model record is %d bytes\n", (int)sizeof(MODEL)); - } else if(strcmp(long_options[option_index].name, "modelin") == 0) { - /* read model records from file or stdin */ - modelin = 1; - if (strcmp(optarg, "-") == 0) fmodelin = stdin; - else if ((fmodelin = fopen(optarg,"rb")) == NULL) { - fprintf(stderr, "Error opening modelin file: %s: %s\n", - optarg, strerror(errno)); - exit(1); - } - fprintf(stderr, "each model record is %d bytes\n", (int)sizeof(MODEL)); - } else if(strcmp(long_options[option_index].name, "rate") == 0) { - if(strcmp(optarg,"3200") == 0) { - lpc_model = 1; - scalar_quant_Wo_e = 1; - lspd = 1; - phase0 = 1; - postfilt = 1; - decimate = 1; - lpcpf = 1; - } else if(strcmp(optarg,"2400") == 0) { - lpc_model = 1; - vector_quant_Wo_e = 1; - lsp = 1; - phase0 = 1; - postfilt = 1; - decimate = 2; - lpcpf = 1; - } else if(strcmp(optarg,"1400") == 0) { - lpc_model = 1; - vector_quant_Wo_e = 1; - lsp = 1; - phase0 = 1; - postfilt = 1; - decimate = 4; - lpcpf = 1; - } else if(strcmp(optarg,"1300") == 0) { - lpc_model = 1; - scalar_quant_Wo_e = 1; - lsp = 1; - phase0 = 1; - postfilt = 1; - decimate = 4; - lpcpf = 1; - } else if(strcmp(optarg,"1200") == 0) { - lpc_model = 1; - scalar_quant_Wo_e = 1; - lspjmv = 1; - phase0 = 1; - postfilt = 1; - decimate = 4; - lpcpf = 1; - } else { - fprintf(stderr, "Error: invalid output rate (3200|2400|1400|1200) %s\n", optarg); - exit(1); - } - } - break; - - case 'h': - print_help(long_options, num_opts, argv); - break; - - case 'o': - if (strcmp(optarg, "-") == 0) fout = stdout; - else if ((fout = fopen(optarg,"wb")) == NULL) { - fprintf(stderr, "Error opening output speech file: %s: %s.\n", - optarg, strerror(errno)); - exit(1); - } - strcpy(out_file,optarg); - break; - - default: - /* This will never be reached */ - break; +int main(int argc, char *argv[]) { + int Fs = 8000; + int set_fs; + + int lpc_model = 0, order = LPC_ORD; + int lsp = 0, lspd = 0, lspvq = 0; + int lspjmv = 0; + int prede = 0; + int postfilt; + int hand_voicing = 0, hi = 0, simlpcpf = 0, modelin = 0, modelout = 0; + int lpcpf = 0; + FILE *fvoicing = 0; + int dec; + int decimate = 1; + int amread, Woread, pahw; + int awread; + int hmread; + int phase0 = 0; + int scalar_quant_Wo_e = 0; + int scalar_quant_Wo_e_low = 0; + int vector_quant_Wo_e = 0; + int dump_pitch_e = 0; + float gain = 1.0; + int bpf_en = 0; + int bpfb_en = 0; + FILE *fam = NULL, *fWo = NULL; + FILE *faw = NULL; + FILE *fhm = NULL; + FILE *fjmv = NULL; + FILE *flspEWov = NULL; + FILE *ften_ms_centre = NULL; + FILE *fmodelout = NULL; + FILE *fmodelin = NULL; +#ifdef DUMP + int dump; +#endif + char out_file[MAX_STR]; + FILE *fout = NULL; /* output speech file */ + int rateK = 0, newamp1vq = 0, rate_K_dec = 0, perframe = 0; + int bands = 0, bands_lower_en; + float bands_lower = -1E32; + int K = 20; + float framelength_s = N_S; + int lspEWov = 0, rateKWov = 0, first = 0; + FILE *frateKWov = NULL; + int ten_ms_centre = 0; + FILE *fphasenn = NULL; + FILE *frateK = NULL; + FILE *frateKin = NULL; + int rateKout, rateKin; + FILE *fbands = NULL; + int bands_resample = 0; + + char *opt_string = "ho:"; + struct option long_options[] = { + {"Fs", required_argument, &set_fs, 1}, + {"rateK", no_argument, &rateK, 1}, + {"perframe", no_argument, &perframe, 1}, + {"newamp1vq", no_argument, &newamp1vq, 1}, + {"rateKdec", required_argument, &rate_K_dec, 1}, + {"rateKout", required_argument, &rateKout, 1}, + {"rateKin", required_argument, &rateKin, 1}, + {"bands", required_argument, &bands, 1}, + {"bands_lower", required_argument, &bands_lower_en, 1}, + {"bands_resample", no_argument, &bands_resample, 1}, + {"lpc", required_argument, &lpc_model, 1}, + {"lsp", no_argument, &lsp, 1}, + {"lspd", no_argument, &lspd, 1}, + {"lspvq", no_argument, &lspvq, 1}, + {"lspjmv", no_argument, &lspjmv, 1}, + {"phase0", no_argument, &phase0, 1}, + {"postfilter", no_argument, &postfilt, 1}, + {"hand_voicing", required_argument, &hand_voicing, 1}, + {"dec", required_argument, &dec, 1}, + {"hi", no_argument, &hi, 1}, + {"simlpcpf", no_argument, &simlpcpf, 1}, + {"lpcpf", no_argument, &lpcpf, 1}, + {"prede", no_argument, &prede, 1}, + {"dump_pitch_e", required_argument, &dump_pitch_e, 1}, + {"sq_pitch_e", no_argument, &scalar_quant_Wo_e, 1}, + {"sq_pitch_e_low", no_argument, &scalar_quant_Wo_e_low, 1}, + {"vq_pitch_e", no_argument, &vector_quant_Wo_e, 1}, + {"rate", required_argument, NULL, 0}, + {"gain", required_argument, NULL, 0}, + {"bpf", no_argument, &bpf_en, 1}, + {"bpfb", no_argument, &bpfb_en, 1}, + {"amread", required_argument, &amread, 1}, + {"hmread", required_argument, &hmread, 1}, + {"awread", required_argument, &awread, 1}, + {"Woread", required_argument, &Woread, 1}, + {"pahw", required_argument, &pahw, 1}, + {"lspEWov", required_argument, &lspEWov, 1}, + {"rateKWov", required_argument, &rateKWov, 1}, + {"first", no_argument, &first, 1}, + {"ten_ms_centre", required_argument, &ten_ms_centre, 1}, + {"framelength_s", required_argument, NULL, 0}, + {"modelout", required_argument, &modelout, 1}, + {"modelin", required_argument, &modelin, 1}, +#ifdef DUMP + {"dump", required_argument, &dump, 1}, +#endif + {"help", no_argument, NULL, 'h'}, + {NULL, no_argument, NULL, 0}}; + int num_opts = sizeof(long_options) / sizeof(struct option); + + /*----------------------------------------------------------------*\ + + Interpret Command Line Arguments + + \*----------------------------------------------------------------*/ + + if (argc < 2) { + print_help(long_options, num_opts, argv); + } + + while (1) { + int option_index = 0; + int opt = getopt_long(argc, argv, opt_string, long_options, &option_index); + if (opt == -1) break; + switch (opt) { + case 0: + if (strcmp(long_options[option_index].name, "Fs") == 0) { + Fs = atoi(optarg); + if ((Fs != 8000) && (Fs != 16000)) { + fprintf(stderr, "Error Fs must be 8000 or 16000\n"); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "lpc") == 0) { + order = atoi(optarg); +#ifdef DUMP + } else if (strcmp(long_options[option_index].name, "dump") == 0) { + if (dump) dump_on(optarg); +#endif + } else if (strcmp(long_options[option_index].name, "lsp") == 0 || + strcmp(long_options[option_index].name, "lspd") == 0 || + strcmp(long_options[option_index].name, "lspvq") == 0) { + assert(order == LPC_ORD); + } else if (strcmp(long_options[option_index].name, "rateKdec") == 0) { + rate_K_dec = atoi(optarg); + fprintf(stderr, "rate_K_dec: %d\n", rate_K_dec); + } else if (strcmp(long_options[option_index].name, "rateKout") == 0) { + /* read model records from file or stdin */ + if ((frateK = fopen(optarg, "wb")) == NULL) { + fprintf(stderr, "Error opening output rateK file: %s: %s\n", optarg, + strerror(errno)); + exit(1); + } + fprintf(stderr, "each record is %d bytes\n", + (int)(K * sizeof(float))); + } else if (strcmp(long_options[option_index].name, "rateKin") == 0) { + /* read model records from file or stdin */ + if ((frateKin = fopen(optarg, "rb")) == NULL) { + fprintf(stderr, "Error opening input rateK file: %s: %s\n", optarg, + strerror(errno)); + exit(1); + } + fprintf(stderr, "each record is %d bytes\n", + (int)(K * sizeof(float))); + } else if (strcmp(long_options[option_index].name, "bands") == 0) { + /* write mel spaced band energies to file or stdout */ + if ((fbands = fopen(optarg, "wb")) == NULL) { + fprintf(stderr, "Error opening bands file: %s: %s\n", optarg, + strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "bands_lower") == + 0) { + bands_lower = atof(optarg); + fprintf(stderr, "bands_lower: %f\n", bands_lower); + } else if (strcmp(long_options[option_index].name, "dec") == 0) { + decimate = atoi(optarg); + if ((decimate != 2) && (decimate != 3) && (decimate != 4)) { + fprintf(stderr, "Error in --dec, must be 2, 3, or 4\n"); + exit(1); + } + + if (!phase0) { + fprintf(stderr, + "needs --phase0 to resample phase when using --dec\n"); + exit(1); + } + if (!lpc_model) { + fprintf(stderr, + "needs --lpc [order] to resample amplitudes when using " + "--dec\n"); + exit(1); + } + + } else if (strcmp(long_options[option_index].name, "hand_voicing") == + 0) { + if ((fvoicing = fopen(optarg, "rt")) == NULL) { + fprintf(stderr, "Error opening voicing file: %s: %s.\n", optarg, + strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "Woread") == 0) { + if ((fWo = fopen(optarg, "rb")) == NULL) { + fprintf(stderr, "Error opening float Wo file: %s: %s.\n", optarg, + strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "amread") == 0) { + if ((fam = fopen(optarg, "rb")) == NULL) { + fprintf(stderr, "Error opening float Am file: %s: %s.\n", optarg, + strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "hmread") == 0) { + if ((fhm = fopen(optarg, "rb")) == NULL) { + fprintf(stderr, "Error opening float Hm file: %s: %s.\n", optarg, + strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "awread") == 0) { + if ((faw = fopen(optarg, "rb")) == NULL) { + fprintf(stderr, "Error opening float Aw file: %s: %s.\n", optarg, + strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "dump_pitch_e") == + 0) { + if ((fjmv = fopen(optarg, "wt")) == NULL) { + fprintf(stderr, "Error opening pitch & energy dump file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "gain") == 0) { + gain = atof(optarg); + } else if (strcmp(long_options[option_index].name, "framelength_s") == + 0) { + framelength_s = atof(optarg); + } else if (strcmp(long_options[option_index].name, "pahw") == 0) { + /* set up a bunch of arguments instead of having to enter them on cmd + * line every time */ + + phase0 = postfilt = amread = hmread = Woread = 1; + char file_name[MAX_STR]; + sprintf(file_name, "%s_am.out", optarg); + fprintf(stderr, "reading %s", file_name); + if ((fam = fopen(file_name, "rb")) == NULL) { + fprintf(stderr, "Error opening float Am file: %s: %s.\n", file_name, + strerror(errno)); + exit(1); + } + sprintf(file_name, "%s_hm.out", optarg); + fprintf(stderr, " %s", file_name); + if ((fhm = fopen(file_name, "rb")) == NULL) { + fprintf(stderr, "Error opening float Hm file: %s: %s.\n", file_name, + strerror(errno)); + exit(1); + } + sprintf(file_name, "%s_Wo.out", optarg); + fprintf(stderr, " %s\n", file_name); + if ((fWo = fopen(file_name, "rb")) == NULL) { + fprintf(stderr, "Error opening float Wo file: %s: %s.\n", file_name, + strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "lspEWov") == 0) { + /* feature file for deep learning experiments */ + lpc_model = 1; + phase0 = 1; + if ((flspEWov = fopen(optarg, "wb")) == NULL) { + fprintf(stderr, "Error opening lspEWov float file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "rateKWov") == 0) { + /* feature file for deep learning experiments */ + rateK = 1; + newamp1vq = 1; + if ((frateKWov = fopen(optarg, "wb")) == NULL) { + fprintf(stderr, "Error opening rateKWov float file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "ten_ms_centre") == + 0) { + /* dump 10ms of audio centred on analysis frame to check time + alignment with 16 kHz source audio */ + ten_ms_centre = 1; + if ((ften_ms_centre = fopen(optarg, "wb")) == NULL) { + fprintf(stderr, "Error opening ten_ms_centre short file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + } else if (strcmp(long_options[option_index].name, "modelout") == 0) { + /* write model records to file or stdout */ + modelout = 1; + if (strcmp(optarg, "-") == 0) + fmodelout = stdout; + else if ((fmodelout = fopen(optarg, "wb")) == NULL) { + fprintf(stderr, "Error opening modelout file: %s: %s\n", optarg, + strerror(errno)); + exit(1); + } + fprintf(stderr, "each model record is %d bytes\n", + (int)sizeof(MODEL)); + } else if (strcmp(long_options[option_index].name, "modelin") == 0) { + /* read model records from file or stdin */ + modelin = 1; + if (strcmp(optarg, "-") == 0) + fmodelin = stdin; + else if ((fmodelin = fopen(optarg, "rb")) == NULL) { + fprintf(stderr, "Error opening modelin file: %s: %s\n", optarg, + strerror(errno)); + exit(1); + } + fprintf(stderr, "each model record is %d bytes\n", + (int)sizeof(MODEL)); + } else if (strcmp(long_options[option_index].name, "rate") == 0) { + if (strcmp(optarg, "3200") == 0) { + lpc_model = 1; + scalar_quant_Wo_e = 1; + lspd = 1; + phase0 = 1; + postfilt = 1; + decimate = 1; + lpcpf = 1; + } else if (strcmp(optarg, "2400") == 0) { + lpc_model = 1; + vector_quant_Wo_e = 1; + lsp = 1; + phase0 = 1; + postfilt = 1; + decimate = 2; + lpcpf = 1; + } else if (strcmp(optarg, "1400") == 0) { + lpc_model = 1; + vector_quant_Wo_e = 1; + lsp = 1; + phase0 = 1; + postfilt = 1; + decimate = 4; + lpcpf = 1; + } else if (strcmp(optarg, "1300") == 0) { + lpc_model = 1; + scalar_quant_Wo_e = 1; + lsp = 1; + phase0 = 1; + postfilt = 1; + decimate = 4; + lpcpf = 1; + } else if (strcmp(optarg, "1200") == 0) { + lpc_model = 1; + scalar_quant_Wo_e = 1; + lspjmv = 1; + phase0 = 1; + postfilt = 1; + decimate = 4; + lpcpf = 1; + } else { + fprintf(stderr, + "Error: invalid output rate (3200|2400|1400|1200) %s\n", + optarg); + exit(1); + } } - } + break; - /* Input file */ + case 'h': + print_help(long_options, num_opts, argv); + break; + + case 'o': + if (strcmp(optarg, "-") == 0) + fout = stdout; + else if ((fout = fopen(optarg, "wb")) == NULL) { + fprintf(stderr, "Error opening output speech file: %s: %s.\n", optarg, + strerror(errno)); + exit(1); + } + strcpy(out_file, optarg); + break; - FILE *fin; /* input speech file */ - if (strcmp(argv[optind], "-") == 0) fin = stdin; - else if ((fin = fopen(argv[optind],"rb")) == NULL) { - fprintf(stderr, "Error opening input speech file: %s: %s.\n", - argv[optind], strerror(errno)); - exit(1); + default: + /* This will never be reached */ + break; } - - C2CONST c2const = c2const_create(Fs, framelength_s); - int n_samp = c2const.n_samp; - int m_pitch = c2const.m_pitch; - - short buf[N_SAMP]; /* input/output buffer */ - float buf_float[N_SAMP]; - float Sn[m_pitch]; /* float input speech samples */ - float Sn_pre[m_pitch]; /* pre-emphasised input speech samples */ - COMP Sw[FFT_ENC]; /* DFT of Sn[] */ - codec2_fft_cfg fft_fwd_cfg; - codec2_fftr_cfg fftr_fwd_cfg; - codec2_fftr_cfg fftr_inv_cfg; - float w[m_pitch]; /* time domain hamming window */ - float W[FFT_ENC]; /* DFT of w[] */ - MODEL model; - float Pn[2*N_SAMP]; /* trapezoidal synthesis window */ - float Sn_[2*N_SAMP]; /* synthesised speech */ - int i,m; /* loop variable */ - int frames; - float prev_f0; - float pitch; - float snr; - float sum_snr; - - float pre_mem = 0.0, de_mem = 0.0; - float ak[1+order]; - // COMP Sw_[FFT_ENC]; - // COMP Ew[FFT_ENC]; - - float ex_phase[MAX_AMP+1]; - - float bg_est = 0.0; - - - MODEL prev_model; - float lsps[order]; - float e, prev_e; - int lsp_indexes[order]; - float lsps_[order]; - float Woe_[2]; - - float lsps_dec[4][order], e_dec[4], weight, weight_inc, ak_dec[4][order]; - MODEL model_dec[4], prev_model_dec; - float prev_lsps_dec[order], prev_e_dec; - - void *nlp_states; - float hpf_states[2]; - #if 0 + } + + /* Input file */ + + FILE *fin; /* input speech file */ + if (strcmp(argv[optind], "-") == 0) + fin = stdin; + else if ((fin = fopen(argv[optind], "rb")) == NULL) { + fprintf(stderr, "Error opening input speech file: %s: %s.\n", argv[optind], + strerror(errno)); + exit(1); + } + + C2CONST c2const = c2const_create(Fs, framelength_s); + int n_samp = c2const.n_samp; + int m_pitch = c2const.m_pitch; + + short buf[N_SAMP]; /* input/output buffer */ + float buf_float[N_SAMP]; + float Sn[m_pitch]; /* float input speech samples */ + float Sn_pre[m_pitch]; /* pre-emphasised input speech samples */ + COMP Sw[FFT_ENC]; /* DFT of Sn[] */ + codec2_fft_cfg fft_fwd_cfg; + codec2_fftr_cfg fftr_fwd_cfg; + codec2_fftr_cfg fftr_inv_cfg; + float w[m_pitch]; /* time domain hamming window */ + float W[FFT_ENC]; /* DFT of w[] */ + MODEL model; + float Pn[2 * N_SAMP]; /* trapezoidal synthesis window */ + float Sn_[2 * N_SAMP]; /* synthesised speech */ + int i, m; /* loop variable */ + int frames; + float prev_f0; + float pitch; + float snr; + float sum_snr; + + float pre_mem = 0.0, de_mem = 0.0; + float ak[1 + order]; + // COMP Sw_[FFT_ENC]; + // COMP Ew[FFT_ENC]; + + float ex_phase[MAX_AMP + 1]; + + float bg_est = 0.0; + + MODEL prev_model; + float lsps[order]; + float e, prev_e; + int lsp_indexes[order]; + float lsps_[order]; + float Woe_[2]; + + float lsps_dec[4][order], e_dec[4], weight, weight_inc, ak_dec[4][order]; + MODEL model_dec[4], prev_model_dec; + float prev_lsps_dec[order], prev_e_dec; + + void *nlp_states; + float hpf_states[2]; +#if 0 struct PEXP *pexp = NULL; struct AEXP *aexp = NULL; - #endif - float bpf_buf[BPF_N+N_SAMP]; - - COMP Aw[FFT_ENC]; - COMP H[MAX_AMP]; - - float sd_sum = 0.0; int sd_frames = 0; - - for(i=0; i<m_pitch; i++) { - Sn[i] = 1.0; - Sn_pre[i] = 1.0; +#endif + float bpf_buf[BPF_N + N_SAMP]; + + COMP Aw[FFT_ENC]; + COMP H[MAX_AMP]; + + float sd_sum = 0.0; + int sd_frames = 0; + + for (i = 0; i < m_pitch; i++) { + Sn[i] = 1.0; + Sn_pre[i] = 1.0; + } + for (i = 0; i < 2 * N_SAMP; i++) Sn_[i] = 0; + + prev_f0 = 1 / P_MAX_S; + + prev_model.Wo = c2const.Wo_max; + prev_model.L = floor(PI / prev_model.Wo); + for (i = 1; i <= prev_model.L; i++) { + prev_model.A[i] = 0.0; + prev_model.phi[i] = 0.0; + } + for (i = 1; i <= MAX_AMP; i++) { + // ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX; + ex_phase[i] = 0.0; + } + e = prev_e = 1; + hpf_states[0] = hpf_states[1] = 0.0; + + nlp_states = nlp_create(&c2const); + + ex_phase[0] = 0; + Woe_[0] = Woe_[1] = 1.0; + + /* Initialise ------------------------------------------------------------*/ + + fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, + NULL); /* fwd FFT,used in several places */ + fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, + NULL); /* fwd FFT,used in several places */ + fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, + NULL); /* inverse FFT, used just for synth */ + codec2_fft_cfg phase_fft_fwd_cfg = + codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); + codec2_fft_cfg phase_fft_inv_cfg = + codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); + + make_analysis_window(&c2const, fft_fwd_cfg, w, W); + make_synthesis_window(&c2const, Pn); + + if (bpfb_en) bpf_en = 1; + if (bpf_en) { + for (i = 0; i < BPF_N; i++) bpf_buf[i] = 0.0; + } + + for (i = 0; i < LPC_ORD; i++) { + prev_lsps_dec[i] = i * PI / (LPC_ORD + 1); + } + prev_e_dec = 1; + for (m = 1; m <= MAX_AMP; m++) prev_model_dec.A[m] = 0.0; + prev_model_dec.Wo = c2const.Wo_min; + prev_model_dec.L = PI / prev_model_dec.Wo; + prev_model_dec.voiced = 0; + + /* mel resampling experiments */ + + float rate_K_sample_freqs_kHz[K]; + float se = 0.0; + int nse = 0; + if (rateK) { + mel_sample_freqs_kHz(rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), + ftomel(3700.0)); + } + float rate_K_vec_delay[rate_K_dec + 1][K]; + float rate_K_vec_delay_[rate_K_dec + 1][K]; + MODEL rate_K_model_delay[rate_K_dec + 1]; + for (int d = 0; d <= rate_K_dec; d++) { + for (int k = 0; k < K; k++) { + rate_K_vec_delay[d][k] = 0; + rate_K_vec_delay_[d][k] = 0; } - for(i=0; i<2*N_SAMP; i++) - Sn_[i] = 0; + for (m = 1; m <= MAX_AMP; m++) rate_K_model_delay[d].A[m] = 0.0; + rate_K_model_delay[d].Wo = c2const.Wo_min; + rate_K_model_delay[d].L = M_PI / prev_model_dec.Wo; + rate_K_model_delay[d].voiced = 0; + } + float eq[K]; + for (int k = 0; k < K; k++) eq[k] = 0; - prev_f0 = 1/P_MAX_S; + /*----------------------------------------------------------------* \ - prev_model.Wo = c2const.Wo_max; - prev_model.L = floor(PI/prev_model.Wo); - for(i=1; i<=prev_model.L; i++) { - prev_model.A[i] = 0.0; - prev_model.phi[i] = 0.0; - } - for(i=1; i<=MAX_AMP; i++) { - //ex_phase[i] = (PI/3)*(float)rand()/RAND_MAX; - ex_phase[i] = 0.0; - } - e = prev_e = 1; - hpf_states[0] = hpf_states[1] = 0.0; - - nlp_states = nlp_create(&c2const); + Main Loop - ex_phase[0] = 0; - Woe_[0] = Woe_[1] = 1.0; + \*----------------------------------------------------------------*/ - /* Initialise ------------------------------------------------------------*/ + frames = 0; + sum_snr = 0; + while (fread(buf, sizeof(short), N_SAMP, fin)) { + frames++; - fft_fwd_cfg = codec2_fft_alloc(FFT_ENC, 0, NULL, NULL); /* fwd FFT,used in several places */ - fftr_fwd_cfg = codec2_fftr_alloc(FFT_ENC, 0, NULL, NULL); /* fwd FFT,used in several places */ - fftr_inv_cfg = codec2_fftr_alloc(FFT_DEC, 1, NULL, NULL); /* inverse FFT, used just for synth */ - codec2_fft_cfg phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); - codec2_fft_cfg phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); + for (i = 0; i < N_SAMP; i++) buf_float[i] = buf[i]; - make_analysis_window(&c2const, fft_fwd_cfg, w, W); - make_synthesis_window(&c2const, Pn); + /* optionally filter input speech */ - if (bpfb_en) - bpf_en = 1; - if (bpf_en) { - for(i=0; i<BPF_N; i++) - bpf_buf[i] = 0.0; + if (prede) { + pre_emp(Sn_pre, buf_float, &pre_mem, N_SAMP); + for (i = 0; i < N_SAMP; i++) buf_float[i] = Sn_pre[i]; } - for(i=0; i<LPC_ORD; i++) { - prev_lsps_dec[i] = i*PI/(LPC_ORD+1); + if (bpf_en) { + /* filter input speech to create buf_float_bpf[], this is fed to the + LPC modelling. Unfiltered speech in in buf_float[], which is + delayed to match that of the BPF */ + + /* BPF speech */ + + for (i = 0; i < BPF_N; i++) bpf_buf[i] = bpf_buf[N_SAMP + i]; + for (i = 0; i < N_SAMP; i++) bpf_buf[BPF_N + i] = buf_float[i]; + if (bpfb_en) + inverse_filter(&bpf_buf[BPF_N], bpfb, N_SAMP, buf_float, BPF_N); + else + inverse_filter(&bpf_buf[BPF_N], bpf, N_SAMP, buf_float, BPF_N); } - prev_e_dec = 1; - for(m=1; m<=MAX_AMP; m++) - prev_model_dec.A[m] = 0.0; - prev_model_dec.Wo = c2const.Wo_min; - prev_model_dec.L = PI/prev_model_dec.Wo; - prev_model_dec.voiced = 0; - /* mel resampling experiments */ + /* shift buffer of input samples, and insert new samples */ - float rate_K_sample_freqs_kHz[K]; float se = 0.0; int nse = 0; - if (rateK) { - mel_sample_freqs_kHz(rate_K_sample_freqs_kHz, NEWAMP1_K, ftomel(200.0), ftomel(3700.0) ); + for (i = 0; i < m_pitch - N_SAMP; i++) { + Sn[i] = Sn[i + N_SAMP]; } - float rate_K_vec_delay[rate_K_dec+1][K]; - float rate_K_vec_delay_[rate_K_dec+1][K]; - MODEL rate_K_model_delay[rate_K_dec+1]; - for (int d=0; d<=rate_K_dec; d++) { - for(int k=0; k<K; k++) { - rate_K_vec_delay[d][k] = 0; - rate_K_vec_delay_[d][k] = 0; - } - for(m=1; m<=MAX_AMP; m++) - rate_K_model_delay[d].A[m] = 0.0; - rate_K_model_delay[d].Wo = c2const.Wo_min; - rate_K_model_delay[d].L = M_PI/prev_model_dec.Wo; - rate_K_model_delay[d].voiced = 0; + for (i = 0; i < N_SAMP; i++) { + Sn[i + m_pitch - N_SAMP] = buf_float[i]; } - float eq[K]; - for(int k=0; k<K; k++) eq[k] = 0; - - /*----------------------------------------------------------------* \ - - Main Loop - - \*----------------------------------------------------------------*/ - - frames = 0; - sum_snr = 0; - while(fread(buf,sizeof(short),N_SAMP,fin)) { - frames++; - - for(i=0; i<N_SAMP; i++) - buf_float[i] = buf[i]; - - /* optionally filter input speech */ - - if (prede) { - pre_emp(Sn_pre, buf_float, &pre_mem, N_SAMP); - for(i=0; i<N_SAMP; i++) - buf_float[i] = Sn_pre[i]; - } - - if (bpf_en) { - /* filter input speech to create buf_float_bpf[], this is fed to the - LPC modelling. Unfiltered speech in in buf_float[], which is - delayed to match that of the BPF */ - - /* BPF speech */ - - for(i=0; i<BPF_N; i++) - bpf_buf[i] = bpf_buf[N_SAMP+i]; - for(i=0; i<N_SAMP; i++) - bpf_buf[BPF_N+i] = buf_float[i]; - if (bpfb_en) - inverse_filter(&bpf_buf[BPF_N], bpfb, N_SAMP, buf_float, BPF_N); - else - inverse_filter(&bpf_buf[BPF_N], bpf, N_SAMP, buf_float, BPF_N); - } - - /* shift buffer of input samples, and insert new samples */ - for(i=0; i<m_pitch-N_SAMP; i++) { - Sn[i] = Sn[i+N_SAMP]; - } - for(i=0; i<N_SAMP; i++) { - Sn[i+m_pitch-N_SAMP] = buf_float[i]; - } - - /*------------------------------------------------------------*\ - - Estimate Sinusoidal Model Parameters - - \*------------------------------------------------------------*/ - - nlp(nlp_states, Sn, N_SAMP, &pitch, Sw, W, &prev_f0); - model.Wo = TWO_PI/pitch; - - dft_speech(&c2const, fft_fwd_cfg, Sw, Sn, w); - two_stage_pitch_refinement(&c2const, &model, Sw); - estimate_amplitudes(&model, Sw, W, 1); - - #ifdef DUMP - dump_Sn(m_pitch, Sn); dump_Sw(Sw); dump_model(&model); - #endif + /*------------------------------------------------------------*\ - /* speech centred on analysis frame for Deep Learning work */ + Estimate Sinusoidal Model Parameters - if (ten_ms_centre) { - int n_10_ms = Fs*0.01; - int n_5_ms = Fs*0.005; - short buf[n_10_ms]; - for(i=0; i<n_10_ms; i++) { - buf[i] = Sn[m_pitch/2-n_5_ms+i]; - } - fwrite(buf, n_10_ms, sizeof(short), ften_ms_centre); - } + \*------------------------------------------------------------*/ - if (hi) { - int m; - for(m=1; m<model.L/2; m++) - model.A[m] = 0.0; - for(m=3*model.L/4; m<=model.L; m++) - model.A[m] = 0.0; - } + nlp(nlp_states, Sn, N_SAMP, &pitch, Sw, W, &prev_f0); + model.Wo = TWO_PI / pitch; - /*------------------------------------------------------------*\ + dft_speech(&c2const, fft_fwd_cfg, Sw, Sn, w); + two_stage_pitch_refinement(&c2const, &model, Sw); + estimate_amplitudes(&model, Sw, W, 1); - Zero-phase modelling +#ifdef DUMP + dump_Sn(m_pitch, Sn); + dump_Sw(Sw); + dump_model(&model); +#endif - \*------------------------------------------------------------*/ + /* speech centred on analysis frame for Deep Learning work */ - /* estimate voicing - do this all the time so model.voicing - * is set, useful for machine learning work */ - snr = est_voicing_mbe(&c2const, &model, Sw, W); - - if (phase0) { - #ifdef DUMP - dump_phase(&model.phi[0], model.L); - #endif + if (ten_ms_centre) { + int n_10_ms = Fs * 0.01; + int n_5_ms = Fs * 0.005; + short buf[n_10_ms]; + for (i = 0; i < n_10_ms; i++) { + buf[i] = Sn[m_pitch / 2 - n_5_ms + i]; + } + fwrite(buf, n_10_ms, sizeof(short), ften_ms_centre); + } - if (dump_pitch_e) - fprintf(fjmv, "%f %f %d ", model.Wo, snr, model.voiced); + if (hi) { + int m; + for (m = 1; m < model.L / 2; m++) model.A[m] = 0.0; + for (m = 3 * model.L / 4; m <= model.L; m++) model.A[m] = 0.0; + } - #ifdef DUMP - dump_snr(snr); - #endif + /*------------------------------------------------------------*\ - /* just to make sure we are not cheating - kill all phases */ + Zero-phase modelling - for(i=0; i<=MAX_AMP; i++) - model.phi[i] = 0; + \*------------------------------------------------------------*/ - if (hand_voicing) { - int ret = fscanf(fvoicing,"%d\n",&model.voiced); - assert(ret == 1); - } - } + /* estimate voicing - do this all the time so model.voicing + * is set, useful for machine learning work */ + snr = est_voicing_mbe(&c2const, &model, Sw, W); - /*------------------------------------------------------------*\ + if (phase0) { +#ifdef DUMP + dump_phase(&model.phi[0], model.L); +#endif - LPC model amplitudes and LSP quantisation + if (dump_pitch_e) fprintf(fjmv, "%f %f %d ", model.Wo, snr, model.voiced); - \*------------------------------------------------------------*/ +#ifdef DUMP + dump_snr(snr); +#endif - if (lpc_model) { - float ak_[LPC_ORD+1]; + /* just to make sure we are not cheating - kill all phases */ - e = speech_to_uq_lsps(lsps, ak, Sn, w, m_pitch, order); - for(i=0; i<order; i++) - lsps_[i] = lsps[i]; + for (i = 0; i <= MAX_AMP; i++) model.phi[i] = 0; - #ifdef DUMP - dump_ak(ak, order); - dump_E(e); - #endif + if (hand_voicing) { + int ret = fscanf(fvoicing, "%d\n", &model.voiced); + assert(ret == 1); + } + } - if (dump_pitch_e) - fprintf(fjmv, "%f\n", e); + /*------------------------------------------------------------*\ - #ifdef DUMP - dump_lsp(lsps); - #endif + LPC model amplitudes and LSP quantisation - /* various LSP quantisation schemes */ + \*------------------------------------------------------------*/ - if (lsp) { - encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); - decode_lsps_scalar(lsps_, lsp_indexes, LPC_ORD); - bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0); - lsp_to_lpc(lsps_, ak_, LPC_ORD); - } + if (lpc_model) { + float ak_[LPC_ORD + 1]; + + e = speech_to_uq_lsps(lsps, ak, Sn, w, m_pitch, order); + for (i = 0; i < order; i++) lsps_[i] = lsps[i]; + +#ifdef DUMP + dump_ak(ak, order); + dump_E(e); +#endif + + if (dump_pitch_e) fprintf(fjmv, "%f\n", e); + +#ifdef DUMP + dump_lsp(lsps); +#endif + + /* various LSP quantisation schemes */ + + if (lsp) { + encode_lsps_scalar(lsp_indexes, lsps, LPC_ORD); + decode_lsps_scalar(lsps_, lsp_indexes, LPC_ORD); + bw_expand_lsps(lsps_, LPC_ORD, 50.0, 100.0); + lsp_to_lpc(lsps_, ak_, LPC_ORD); + } + + if (lspd) { + encode_lspds_scalar(lsp_indexes, lsps, LPC_ORD); + decode_lspds_scalar(lsps_, lsp_indexes, LPC_ORD); + lsp_to_lpc(lsps_, ak_, LPC_ORD); + } + + if (lspjmv) { + /* Jean-Marc's multi-stage, split VQ */ + lspjmv_quantise(lsps, lsps_, LPC_ORD); + { + float lsps_bw[LPC_ORD]; + memcpy(lsps_bw, lsps_, sizeof(float) * order); + bw_expand_lsps(lsps_bw, LPC_ORD, 50.0, 100.0); + lsp_to_lpc(lsps_bw, ak_, LPC_ORD); + } + } + + if (lsp || lspd || lspjmv) { + sd_sum += spectral_dist(ak, ak_, LPC_ORD, fft_fwd_cfg, FFT_ENC); + sd_frames++; + } + + memcpy(ak, ak_, (LPC_ORD + 1) * sizeof(float)); + + if (scalar_quant_Wo_e) { + e = decode_energy(encode_energy(e, E_BITS), E_BITS); + model.Wo = decode_Wo(&c2const, encode_Wo(&c2const, model.Wo, WO_BITS), + WO_BITS); + model.L = PI / model.Wo; /* if we quantise Wo re-compute L */ + } + + if (scalar_quant_Wo_e_low) { + int ind; + e = decode_energy(ind = encode_energy(e, 3), 3); + model.Wo = + decode_log_Wo(&c2const, encode_log_Wo(&c2const, model.Wo, 5), 5); + model.L = PI / model.Wo; /* if we quantise Wo re-compute L */ + } + + if (vector_quant_Wo_e) { + /* JVM's experimental joint Wo & LPC energy quantiser */ + quantise_WoE(&c2const, &model, &e, Woe_); + } + } - if (lspd) { - encode_lspds_scalar(lsp_indexes, lsps, LPC_ORD); - decode_lspds_scalar(lsps_, lsp_indexes, LPC_ORD); - lsp_to_lpc(lsps_, ak_, LPC_ORD); - } - - if (lspjmv) { - /* Jean-Marc's multi-stage, split VQ */ - lspjmv_quantise(lsps, lsps_, LPC_ORD); - { - float lsps_bw[LPC_ORD]; - memcpy(lsps_bw, lsps_, sizeof(float)*order); - bw_expand_lsps(lsps_bw, LPC_ORD, 50.0, 100.0); - lsp_to_lpc(lsps_bw, ak_, LPC_ORD); - } - } - - if (lsp || lspd || lspjmv) { - sd_sum += spectral_dist(ak, ak_, LPC_ORD, fft_fwd_cfg, FFT_ENC); - sd_frames ++; - } + if (amread) { + int ret = fread(model.A, sizeof(float), MAX_AMP, fam); + assert(ret == MAX_AMP); + } - memcpy(ak, ak_, (LPC_ORD+1)*sizeof(float)); + if (Woread) { + int ret = fread(&model.Wo, sizeof(float), 1, fWo); + model.L = floor(PI / model.Wo); + assert(ret == 1); + } - if (scalar_quant_Wo_e) { - e = decode_energy(encode_energy(e, E_BITS), E_BITS); - model.Wo = decode_Wo(&c2const, encode_Wo(&c2const, model.Wo, WO_BITS), WO_BITS); - model.L = PI/model.Wo; /* if we quantise Wo re-compute L */ - } + /* dump features for Deep learning, placed here so we can get quantised + * features */ + + if (lspEWov) { + /* order LSPs - energy - Wo - voicing flag - order LPCs */ + if (lsp) + fwrite(lsps_, order, sizeof(float), flspEWov); + else + fwrite(lsps, order, sizeof(float), flspEWov); + + fwrite(&e, 1, sizeof(float), flspEWov); + fwrite(&model.Wo, 1, sizeof(float), flspEWov); + float voiced_float = model.voiced; + fwrite(&voiced_float, 1, sizeof(float), flspEWov); + fwrite(&ak[1], order, sizeof(float), flspEWov); + } - if (scalar_quant_Wo_e_low) { - int ind; - e = decode_energy(ind = encode_energy(e, 3), 3); - model.Wo = decode_log_Wo(&c2const, encode_log_Wo(&c2const, model.Wo, 5), 5); - model.L = PI/model.Wo; /* if we quantise Wo re-compute L */ - } + /* LPCNet type mel spaced band ML data */ + float bands_mean = 0.0; + if (fbands) { + float bandE[LPCNET_FREQ_MAX_BANDS]; + float freqkHz[LPCNET_FREQ_MAX_BANDS]; + int nbands = lpcnet_compute_band_energy(bandE, freqkHz, Sw, Fs, FFT_ENC); + for (int i = 0; i < nbands; i++) bands_mean += bandE[i]; + bands_mean /= nbands; + // fprintf(stderr, "bands_mean: %f bands_lower %f\n", bands_mean, + // bands_lower); + if (bands_mean > bands_lower) + assert(fwrite(bandE, sizeof(float), nbands, fbands) == nbands); + // optionally reconstruct [Am} by linear interpolation of band energies, + // this doesn't sound very Good + if (bands_resample) + resample_rate_L(&c2const, &model, &bandE[1], &freqkHz[1], nbands - 2); + } - if (vector_quant_Wo_e) { - /* JVM's experimental joint Wo & LPC energy quantiser */ - quantise_WoE(&c2const, &model, &e, Woe_); - } + /*------------------------------------------------------------*\ - } + Optional newamp1 simulation, as used in 700C - if (amread) { - int ret = fread(model.A, sizeof(float), MAX_AMP, fam); - assert(ret == MAX_AMP); - } + \*------------------------------------------------------------*/ - if (Woread) { - int ret = fread(&model.Wo, sizeof(float), 1, fWo); - model.L = floor(PI/model.Wo); - assert(ret == 1); + if (rateK) { + float rate_K_vec[K]; + resample_const_rate_f(&c2const, &model, rate_K_vec, + rate_K_sample_freqs_kHz, K); + + if (frateK != NULL) + assert(fwrite(rate_K_vec, sizeof(float), K, frateK) == K); + + if (frateKin != NULL) { + assert(fread(rate_K_vec, sizeof(float), K, frateKin) == K); + /* apply newamp1 postfilter - this helped male samples with VQVAE work + */ + float sum = 0.0; + for (int k = 0; k < K; k++) sum += rate_K_vec[k]; + float mean = sum / K; + float rate_K_vec_no_mean[K]; + for (int k = 0; k < K; k++) + rate_K_vec_no_mean[k] = rate_K_vec[k] - mean; + post_filter_newamp1(rate_K_vec_no_mean, rate_K_sample_freqs_kHz, K, + 1.5); + for (int k = 0; k < K; k++) + rate_K_vec[k] = rate_K_vec_no_mean[k] + mean; + } + + float rate_K_vec_[K]; + if (newamp1vq) { + /* remove mean */ + float sum = 0.0; + for (int k = 0; k < K; k++) sum += rate_K_vec[k]; + float mean = sum / K; + float rate_K_vec_no_mean[K]; + for (int k = 0; k < K; k++) + rate_K_vec_no_mean[k] = rate_K_vec[k] - mean; + + newamp1_eq(rate_K_vec_no_mean, eq, K, 1); + + /* two stage VQ */ + float rate_K_vec_no_mean_[K]; + int indexes[2]; + rate_K_mbest_encode(indexes, rate_K_vec_no_mean, rate_K_vec_no_mean_, K, + NEWAMP1_VQ_MBEST_DEPTH); + for (int k = 0; k < K; k++) + rate_K_vec_[k] = rate_K_vec_no_mean_[k] + mean; + + /* running sum of squared error for variance calculation */ + for (int k = 0; k < K; k++) + se += pow(rate_K_vec_no_mean[k] - rate_K_vec_no_mean_[k], 2.0); + nse += K; + } else { + for (int k = 0; k < K; k++) rate_K_vec_[k] = rate_K_vec[k]; + } + + if (frateKWov != NULL) { + /* We use standard nb_features=55 feature records for compatibility with + * train_lpcnet.py */ + float features[55] = {0}; + /* just using 18/20 for compatibility with LPCNet, coarse scaling for NN + * input */ + for (int i = 0; i < 18; i++) features[i] = (rate_K_vec_[i] - 30) / 40; + // keep in range of 40 ... 255 for pitch embedding + int pitch_index = 21 + 2.0 * M_PI / model.Wo; + features[36] = 0.02 * (pitch_index - 100); + // features[36] = (model.Wo - c2const.Wo_min)/(c2const.Wo_max - + // c2const.Wo_min) - 0.5; + features[37] = model.voiced; + if (first) features[18] = -0.9; + if (lpc_model) { + MODEL model_; + model_.Wo = model.Wo; + model_.L = model.L; + model_.voiced = model.voiced; + float Rk[order + 1], ak[order + 1]; + resample_rate_L(&c2const, &model_, rate_K_vec_, + rate_K_sample_freqs_kHz, K); + determine_autoc(&c2const, Rk, order, &model_, NEWAMP1_PHASE_NFFT, + phase_fft_fwd_cfg, phase_fft_inv_cfg); + /* -40 dB noise floor and Lag windowing from LPCNet/freq.c - helps + reduce large spikes in spectrum when LPC analysis loses it. */ + Rk[0] += Rk[0] * 1e-4 + 320 / 12 / 38.; + for (i = 1; i < order + 1; i++) Rk[i] *= (1 - 6e-5 * i * i); + levinson_durbin(Rk, ak, order); + + for (int i = 0; i < order; i++) features[18 + i] = ak[i + 1]; } - - /* dump features for Deep learning, placed here so we can get quantised features */ - - if (lspEWov) { - /* order LSPs - energy - Wo - voicing flag - order LPCs */ - if (lsp) - fwrite(lsps_, order, sizeof(float), flspEWov); - else - fwrite(lsps, order, sizeof(float), flspEWov); - - fwrite(&e, 1, sizeof(float), flspEWov); - fwrite(&model.Wo, 1, sizeof(float), flspEWov); - float voiced_float = model.voiced; - fwrite(&voiced_float, 1, sizeof(float), flspEWov); - fwrite(&ak[1], order, sizeof(float), flspEWov); + fwrite(features, 55, sizeof(float), frateKWov); + } + + if (rate_K_dec) { + // update delay lines + for (int d = 0; d < rate_K_dec; d++) { + rate_K_model_delay[d] = rate_K_model_delay[d + 1]; + memcpy(&rate_K_vec_delay[d][0], &rate_K_vec_delay[d + 1][0], + sizeof(float) * K); } - - /* LPCNet type mel spaced band ML data */ - float bands_mean = 0.0; - if (fbands) { - float bandE[LPCNET_FREQ_MAX_BANDS]; - float freqkHz[LPCNET_FREQ_MAX_BANDS]; - int nbands = lpcnet_compute_band_energy(bandE, freqkHz, Sw, Fs, FFT_ENC); - for(int i=0; i<nbands; i++) - bands_mean += bandE[i]; - bands_mean /= nbands; - //fprintf(stderr, "bands_mean: %f bands_lower %f\n", bands_mean, bands_lower); - if (bands_mean > bands_lower) - assert(fwrite(bandE, sizeof(float), nbands, fbands) == nbands); - // optionally reconstruct [Am} by linear interpolation of band energies, - // this doesn't sound very Good - if (bands_resample) - resample_rate_L(&c2const, &model, &bandE[1], &freqkHz[1], nbands-2); - } - - /*------------------------------------------------------------*\ - - Optional newamp1 simulation, as used in 700C - - \*------------------------------------------------------------*/ - - if (rateK) { - float rate_K_vec[K]; - resample_const_rate_f(&c2const, &model, rate_K_vec, rate_K_sample_freqs_kHz, K); - - if (frateK != NULL) - assert(fwrite(rate_K_vec, sizeof(float), K, frateK) == K); - - if (frateKin != NULL) { - assert(fread(rate_K_vec, sizeof(float), K, frateKin) == K); - /* apply newamp1 postfilter - this helped male samples with VQVAE work */ - float sum = 0.0; - for(int k=0; k<K; k++) - sum += rate_K_vec[k]; - float mean = sum/K; - float rate_K_vec_no_mean[K]; - for(int k=0; k<K; k++) - rate_K_vec_no_mean[k] = rate_K_vec[k] - mean; - post_filter_newamp1(rate_K_vec_no_mean, rate_K_sample_freqs_kHz, K, 1.5); - for(int k=0; k<K; k++) - rate_K_vec[k] = rate_K_vec_no_mean[k] + mean; - } - - float rate_K_vec_[K]; - if (newamp1vq) { - /* remove mean */ - float sum = 0.0; - for(int k=0; k<K; k++) - sum += rate_K_vec[k]; - float mean = sum/K; - float rate_K_vec_no_mean[K]; - for(int k=0; k<K; k++) - rate_K_vec_no_mean[k] = rate_K_vec[k] - mean; - - newamp1_eq(rate_K_vec_no_mean, eq, K, 1); - - /* two stage VQ */ - float rate_K_vec_no_mean_[K]; int indexes[2]; - rate_K_mbest_encode(indexes, rate_K_vec_no_mean, rate_K_vec_no_mean_, K, NEWAMP1_VQ_MBEST_DEPTH); - for(int k=0; k<K; k++) - rate_K_vec_[k] = rate_K_vec_no_mean_[k] + mean; - - /* running sum of squared error for variance calculation */ - for(int k=0; k<K; k++) - se += pow(rate_K_vec_no_mean[k]-rate_K_vec_no_mean_[k],2.0); - nse += K; - } - else { - for(int k=0; k<K; k++) - rate_K_vec_[k] = rate_K_vec[k]; + rate_K_model_delay[rate_K_dec] = model; + memcpy(&rate_K_vec_delay[rate_K_dec][0], rate_K_vec_, + sizeof(float) * K); + + if ((frames % rate_K_dec) == 0) { + // every rate_K_dec frames, calculate interpolated output values + if (perframe) { + // calculate interpolation coeff c for each frame + float *A = &rate_K_vec_delay[0][0]; + float *B = &rate_K_vec_delay[rate_K_dec][0]; + for (int d = 0; d <= rate_K_dec; d++) { + float *T = &rate_K_vec_delay[d][0]; + float num = 0.0, den = 0.0; + for (int k = 0; k < K; k++) { + num += (B[k] - T[k]) * (A[k] - B[k]); + den += (A[k] - B[k]) * (A[k] - B[k]); + } + float c = -num / den; + for (int k = 0; k < K; k++) + rate_K_vec_delay_[d][k] = c * A[k] + (1.0 - c) * B[k]; } - - if (frateKWov != NULL) { - /* We use standard nb_features=55 feature records for compatibility with train_lpcnet.py */ - float features[55] = {0}; - /* just using 18/20 for compatibility with LPCNet, coarse scaling for NN input */ - for(int i=0; i<18; i++) - features[i] = (rate_K_vec_[i]-30)/40; - // keep in range of 40 ... 255 for pitch embedding - int pitch_index = 21 + 2.0*M_PI/model.Wo; - features[36] = 0.02*(pitch_index-100); - //features[36] = (model.Wo - c2const.Wo_min)/(c2const.Wo_max - c2const.Wo_min) - 0.5; - features[37] = model.voiced; - if (first) - features[18] = -0.9; - if (lpc_model) { - MODEL model_; - model_.Wo = model.Wo; - model_.L = model.L; - model_.voiced = model.voiced; - float Rk[order+1], ak[order+1]; - resample_rate_L(&c2const, &model_, rate_K_vec_, rate_K_sample_freqs_kHz, K); - determine_autoc(&c2const, Rk, order, &model_, NEWAMP1_PHASE_NFFT, phase_fft_fwd_cfg, phase_fft_inv_cfg); - /* -40 dB noise floor and Lag windowing from LPCNet/freq.c - helps reduce large spikes in spectrum when LPC - analysis loses it. */ - Rk[0] += Rk[0]*1e-4 + 320/12/38.; - for (i=1;i<order+1;i++) Rk[i] *= (1 - 6e-5*i*i); - levinson_durbin(Rk, ak, order); - - for(int i=0; i<order; i++) - features[18+i] = ak[i+1]; - } - fwrite(features, 55, sizeof(float), frateKWov); - } - - if (rate_K_dec) { - // update delay lines - for(int d=0; d<rate_K_dec; d++) { - rate_K_model_delay[d] = rate_K_model_delay[d+1]; - memcpy(&rate_K_vec_delay[d][0], &rate_K_vec_delay[d+1][0], sizeof(float)*K); - } - rate_K_model_delay[rate_K_dec] = model; - memcpy(&rate_K_vec_delay[rate_K_dec][0], rate_K_vec_, sizeof(float)*K); - - if ((frames % rate_K_dec) == 0) { - // every rate_K_dec frames, calculate interpolated output values - if (perframe) { - // calculate interpolation coeff c for each frame - float *A = &rate_K_vec_delay[0][0]; - float *B = &rate_K_vec_delay[rate_K_dec][0]; - for(int d=0; d<=rate_K_dec; d++) { - float *T = &rate_K_vec_delay[d][0]; - float num = 0.0, den = 0.0; - for(int k=0; k<K; k++) { - num += (B[k]-T[k])*(A[k]-B[k]); - den += (A[k]-B[k])*(A[k]-B[k]); - } - float c = -num/den; - for(int k=0; k<K; k++) - rate_K_vec_delay_[d][k] = c*A[k] + (1.0-c)*B[k]; - } - } - else { - // use linear interpolation - float c=0.0, inc = 1.0/rate_K_dec; - for(int d=0; d<=rate_K_dec; d++) { - for(int k=0; k<K; k++) - rate_K_vec_delay_[d][k] = (1.0-c)*rate_K_vec_delay[0][k] + c*rate_K_vec_delay[rate_K_dec][k]; - c += inc; - } - } - } else { - // otherwise just shift out frames we have already interpolated - for(int d=0; d<rate_K_dec; d++) { - memcpy(&rate_K_vec_delay_[d][0], &rate_K_vec_delay_[d+1][0], sizeof(float)*K); - } - } - - // output from delay line - model = rate_K_model_delay[0]; - for(int k=0; k<K; k++) - rate_K_vec_[k] = rate_K_vec_delay_[0][k]; + } else { + // use linear interpolation + float c = 0.0, inc = 1.0 / rate_K_dec; + for (int d = 0; d <= rate_K_dec; d++) { + for (int k = 0; k < K; k++) + rate_K_vec_delay_[d][k] = (1.0 - c) * rate_K_vec_delay[0][k] + + c * rate_K_vec_delay[rate_K_dec][k]; + c += inc; } - - resample_rate_L(&c2const, &model, rate_K_vec_, rate_K_sample_freqs_kHz, K); + } + } else { + // otherwise just shift out frames we have already interpolated + for (int d = 0; d < rate_K_dec; d++) { + memcpy(&rate_K_vec_delay_[d][0], &rate_K_vec_delay_[d + 1][0], + sizeof(float) * K); + } } - /*------------------------------------------------------------*\ - - Synthesise and optional decimation to 20 or 40ms frame rate + // output from delay line + model = rate_K_model_delay[0]; + for (int k = 0; k < K; k++) rate_K_vec_[k] = rate_K_vec_delay_[0][k]; + } - \*------------------------------------------------------------*/ - - /* - if decimate == 2, we interpolate frame n from frame n-1 and n+1 - if decimate == 4, we interpolate frames n, n+1, n+2, from frames n-1 and n+3 - - This is meant to give identical results to the implementations of various modes - in codec2.c - */ + resample_rate_L(&c2const, &model, rate_K_vec_, rate_K_sample_freqs_kHz, + K); + } - /* delay line to keep frame by frame voicing decisions */ + /*------------------------------------------------------------*\ + + Synthesise and optional decimation to 20 or 40ms frame rate + + \*------------------------------------------------------------*/ + + /* + if decimate == 2, we interpolate frame n from frame n-1 and n+1 + if decimate == 4, we interpolate frames n, n+1, n+2, from frames n-1 and + n+3 + + This is meant to give identical results to the implementations of various + modes in codec2.c + */ + + /* delay line to keep frame by frame voicing decisions */ + + for (i = 0; i < decimate - 1; i++) model_dec[i] = model_dec[i + 1]; + model_dec[decimate - 1] = model; + + if ((frames % decimate) == 0) { + for (i = 0; i < order; i++) lsps_dec[decimate - 1][i] = lsps_[i]; + e_dec[decimate - 1] = e; + model_dec[decimate - 1] = model; + + /* interpolate the model parameters */ + + weight_inc = 1.0 / decimate; + for (i = 0, weight = weight_inc; i < decimate - 1; + i++, weight += weight_inc) { + // model_dec[i].voiced = model_dec[decimate-1].voiced; + interpolate_lsp_ver2(&lsps_dec[i][0], prev_lsps_dec, + &lsps_dec[decimate - 1][0], weight, order); + interp_Wo2(&model_dec[i], &prev_model_dec, &model_dec[decimate - 1], + weight, c2const.Wo_min); + e_dec[i] = interp_energy2(prev_e_dec, e_dec[decimate - 1], weight); + } + + /* then recover spectral amplitudes and synthesise */ + + for (i = 0; i < decimate; i++) { + if (lpc_model) { + lsp_to_lpc(&lsps_dec[i][0], &ak_dec[i][0], order); + aks_to_M2(fftr_fwd_cfg, &ak_dec[i][0], order, &model_dec[i], e_dec[i], + &snr, 0, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw); + apply_lpc_correction(&model_dec[i]); + sum_snr += snr; +#ifdef DUMP + dump_lsp_(&lsps_dec[i][0]); + dump_ak_(&ak_dec[i][0], order); + dump_quantised_model(&model_dec[i]); +#endif + } - for(i=0; i<decimate-1; i++) - model_dec[i] = model_dec[i+1]; - model_dec[decimate-1] = model; + if (modelin) { + int nrec; + nrec = fread(&model_dec[i], sizeof(MODEL), 1, fmodelin); + if (nrec != 1) { + fprintf(stderr, + "Warning - error reading model in record in frame %d - do " + "you have enough records in file?\n", + frames); + } + } - if ((frames % decimate) == 0) { - for(i=0; i<order; i++) - lsps_dec[decimate-1][i] = lsps_[i]; - e_dec[decimate-1] = e; - model_dec[decimate-1] = model; + if (phase0) { + /* optionally read in Aw, replacing values generated using LPC */ - /* interpolate the model parameters */ + if (awread) { + int ret = fread(Aw, sizeof(COMP), FFT_ENC, faw); + assert(ret == FFT_ENC); + } - weight_inc = 1.0/decimate; - for(i=0, weight=weight_inc; i<decimate-1; i++, weight += weight_inc) { - //model_dec[i].voiced = model_dec[decimate-1].voiced; - interpolate_lsp_ver2(&lsps_dec[i][0], prev_lsps_dec, &lsps_dec[decimate-1][0], weight, order); - interp_Wo2(&model_dec[i], &prev_model_dec, &model_dec[decimate-1], weight, c2const.Wo_min); - e_dec[i] = interp_energy2(prev_e_dec, e_dec[decimate-1],weight); - } + /* optionally read in Hm directly, bypassing sampling of Aw[] */ - /* then recover spectral amplitudes and synthesise */ - - for(i=0; i<decimate; i++) { - if (lpc_model) { - lsp_to_lpc(&lsps_dec[i][0], &ak_dec[i][0], order); - aks_to_M2(fftr_fwd_cfg, &ak_dec[i][0], order, &model_dec[i], e_dec[i], - &snr, 0, simlpcpf, lpcpf, 1, LPCPF_BETA, LPCPF_GAMMA, Aw); - apply_lpc_correction(&model_dec[i]); - sum_snr += snr; - #ifdef DUMP - dump_lsp_(&lsps_dec[i][0]); - dump_ak_(&ak_dec[i][0], order); - dump_quantised_model(&model_dec[i]); - #endif - } - - if (modelin) { - int nrec; - nrec = fread(&model_dec[i],sizeof(MODEL),1,fmodelin); - if (nrec != 1) { - fprintf(stderr, "Warning - error reading model in record in frame %d - do you have enough records in file?\n", frames); - } - } - - if (phase0) { - /* optionally read in Aw, replacing values generated using LPC */ - - if (awread) { - int ret = fread(Aw, sizeof(COMP), FFT_ENC, faw); - assert(ret == FFT_ENC); - } - - /* optionally read in Hm directly, bypassing sampling of Aw[] */ - - if (hmread) { - int ret = fread(H, sizeof(COMP), MAX_AMP, fhm); - assert(ret == MAX_AMP); - } else { - determine_phase(&c2const, H, &model_dec[i], NEWAMP1_PHASE_NFFT, phase_fft_fwd_cfg, phase_fft_inv_cfg); - } - phase_synth_zero_order(n_samp, &model_dec[i], ex_phase, H); - } - - if (postfilt) - postfilter(&model_dec[i], &bg_est); - synth_one_frame(n_samp, fftr_inv_cfg, buf, &model_dec[i], Sn_, Pn, prede, &de_mem, gain); - if (fout != NULL) - fwrite(buf,sizeof(short),N_SAMP,fout); - if (modelout) { - /* optionally don't write to filter out low energy frames */ - if (bands) { - if (bands_mean > bands_lower) - fwrite(&model_dec[i],sizeof(MODEL),1,fmodelout); - } - else - fwrite(&model_dec[i],sizeof(MODEL),1,fmodelout); - } - } + if (hmread) { + int ret = fread(H, sizeof(COMP), MAX_AMP, fhm); + assert(ret == MAX_AMP); + } else { + determine_phase(&c2const, H, &model_dec[i], NEWAMP1_PHASE_NFFT, + phase_fft_fwd_cfg, phase_fft_inv_cfg); + } + phase_synth_zero_order(n_samp, &model_dec[i], ex_phase, H); + } - /* update memories for next frame ----------------------------*/ + if (postfilt) postfilter(&model_dec[i], &bg_est); + synth_one_frame(n_samp, fftr_inv_cfg, buf, &model_dec[i], Sn_, Pn, + prede, &de_mem, gain); + if (fout != NULL) fwrite(buf, sizeof(short), N_SAMP, fout); + if (modelout) { + /* optionally don't write to filter out low energy frames */ + if (bands) { + if (bands_mean > bands_lower) + fwrite(&model_dec[i], sizeof(MODEL), 1, fmodelout); + } else + fwrite(&model_dec[i], sizeof(MODEL), 1, fmodelout); + } + } - prev_model_dec = model_dec[decimate-1]; - prev_e_dec = e_dec[decimate-1]; - for(i=0; i<LPC_ORD; i++) - prev_lsps_dec[i] = lsps_dec[decimate-1][i]; - } + /* update memories for next frame ----------------------------*/ + prev_model_dec = model_dec[decimate - 1]; + prev_e_dec = e_dec[decimate - 1]; + for (i = 0; i < LPC_ORD; i++) + prev_lsps_dec[i] = lsps_dec[decimate - 1][i]; } + } + + /*----------------------------------------------------------------*\ + + End Main Loop + + \*----------------------------------------------------------------*/ + + fclose(fin); + + if (fout != NULL) fclose(fout); + + if (lpc_model) { + fprintf(stderr, "LPC->{Am} SNR av: %5.2f dB over %d frames\n", + sum_snr / frames, frames); + if (lsp || lspd || lspjmv) + fprintf(stderr, "LSP quantiser SD: %5.2f dB*dB over %d frames\n", + sd_sum / sd_frames, sd_frames); + } + if (newamp1vq) { + fprintf(stderr, "var: %3.2f dB*dB\n", se / nse); + } +#ifdef DUMP + if (dump) dump_off(); +#endif + + if (hand_voicing) fclose(fvoicing); + + nlp_destroy(nlp_states); + + if (fam != NULL) fclose(fam); + if (fWo != NULL) fclose(fWo); + if (faw != NULL) fclose(faw); + if (fhm != NULL) fclose(fhm); + if (fjmv != NULL) fclose(fjmv); + if (flspEWov != NULL) fclose(flspEWov); + if (fphasenn != NULL) fclose(fphasenn); + if (frateK != NULL) fclose(frateK); + if (frateKin != NULL) fclose(frateKin); + if (ften_ms_centre != NULL) fclose(ften_ms_centre); + if (fmodelout != NULL) fclose(fmodelout); + if (fbands != NULL) fclose(fbands); + if (frateKWov != NULL) fclose(frateKWov); + + return 0; +} - /*----------------------------------------------------------------*\ - - End Main Loop - - \*----------------------------------------------------------------*/ - - fclose(fin); - - if (fout != NULL) - fclose(fout); - - if (lpc_model) { - fprintf(stderr, "LPC->{Am} SNR av: %5.2f dB over %d frames\n", sum_snr/frames, frames); - if (lsp || lspd || lspjmv) - fprintf(stderr, "LSP quantiser SD: %5.2f dB*dB over %d frames\n", sd_sum/sd_frames, sd_frames); - } - if (newamp1vq) { - fprintf(stderr, "var: %3.2f dB*dB\n", se/nse); - } - #ifdef DUMP - if (dump) - dump_off(); - #endif - - if (hand_voicing) - fclose(fvoicing); - - nlp_destroy(nlp_states); - - if (fam != NULL) fclose(fam); - if (fWo != NULL) fclose(fWo); - if (faw != NULL) fclose(faw); - if (fhm != NULL) fclose(fhm); - if (fjmv != NULL) fclose(fjmv); - if (flspEWov != NULL) fclose(flspEWov); - if (fphasenn != NULL) fclose(fphasenn); - if (frateK != NULL) fclose(frateK); - if (frateKin != NULL) fclose(frateKin); - if (ften_ms_centre != NULL) fclose(ften_ms_centre); - if (fmodelout != NULL) fclose(fmodelout); - if (fbands != NULL) fclose(fbands); - if (frateKWov != NULL) fclose(frateKWov); - - return 0; +void synth_one_frame(int n_samp, codec2_fftr_cfg fftr_inv_cfg, short buf[], + MODEL *model, float Sn_[], float Pn[], int prede, + float *de_mem, float gain) { + int i; + + synthesise(n_samp, fftr_inv_cfg, Sn_, model, Pn, 1); + if (prede) de_emp(Sn_, Sn_, de_mem, n_samp); + + for (i = 0; i < n_samp; i++) { + Sn_[i] *= gain; + if (Sn_[i] > 32767.0) + buf[i] = 32767; + else if (Sn_[i] < -32767.0) + buf[i] = -32767; + else + buf[i] = Sn_[i]; + } } -void synth_one_frame(int n_samp, codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[], - float Pn[], int prede, float *de_mem, float gain) -{ - int i; - - synthesise(n_samp, fftr_inv_cfg, Sn_, model, Pn, 1); - if (prede) - de_emp(Sn_, Sn_, de_mem, n_samp); - - for(i=0; i<n_samp; i++) { - Sn_[i] *= gain; - if (Sn_[i] > 32767.0) - buf[i] = 32767; - else if (Sn_[i] < -32767.0) - buf[i] = -32767; - else - buf[i] = Sn_[i]; +void print_help(const struct option *long_options, int num_opts, char *argv[]) { + int i; + char *option_parameters; + + fprintf(stderr, + "\nCodec2 - low bit rate speech codec - Simulation Program\n" + "\thttp://rowetel.com/codec2.html\n\n" + "usage: %s [OPTIONS] <InputFile>\n\n" + "Options:\n" + "\t-o <OutputFile>\n", + argv[0]); + for (i = 0; i < num_opts - 1; i++) { + if (long_options[i].has_arg == no_argument) { + option_parameters = ""; + } else if (strcmp("lpc", long_options[i].name) == 0) { + option_parameters = " <Order>"; + } else if (strcmp("dec", long_options[i].name) == 0) { + option_parameters = " <2|4>"; + } else if (strcmp("hand_voicing", long_options[i].name) == 0) { + option_parameters = " <VoicingFile>"; + } else if (strcmp("dump_pitch_e", long_options[i].name) == 0) { + option_parameters = " <Dump File>"; + } else if (strcmp("rate", long_options[i].name) == 0) { + option_parameters = " <3200|2400|1400|1300|1200>"; + } else if (strcmp("dump", long_options[i].name) == 0) { + option_parameters = " <DumpFilePrefix>"; + } else { + option_parameters = " <UNDOCUMENTED parameter>"; } + fprintf(stderr, "\t--%s%s\n", long_options[i].name, option_parameters); + } -} - -void print_help(const struct option* long_options, int num_opts, char* argv[]) -{ - int i; - char *option_parameters; - - fprintf(stderr, "\nCodec2 - low bit rate speech codec - Simulation Program\n" - "\thttp://rowetel.com/codec2.html\n\n" - "usage: %s [OPTIONS] <InputFile>\n\n" - "Options:\n" - "\t-o <OutputFile>\n", argv[0]); - for(i=0; i<num_opts-1; i++) { - if(long_options[i].has_arg == no_argument) { - option_parameters=""; - } else if (strcmp("lpc", long_options[i].name) == 0) { - option_parameters = " <Order>"; - } else if (strcmp("dec", long_options[i].name) == 0) { - option_parameters = " <2|4>"; - } else if (strcmp("hand_voicing", long_options[i].name) == 0) { - option_parameters = " <VoicingFile>"; - } else if (strcmp("dump_pitch_e", long_options[i].name) == 0) { - option_parameters = " <Dump File>"; - } else if (strcmp("rate", long_options[i].name) == 0) { - option_parameters = " <3200|2400|1400|1300|1200>"; - } else if (strcmp("dump", long_options[i].name) == 0) { - option_parameters = " <DumpFilePrefix>"; - } else { - option_parameters = " <UNDOCUMENTED parameter>"; - } - fprintf(stderr, "\t--%s%s\n", long_options[i].name, option_parameters); - } - - exit(1); + exit(1); } |
