From ac7c48b4dee99d4c772f133d70d8d1b38262fcd2 Mon Sep 17 00:00:00 2001 From: Author Name Date: Fri, 7 Jul 2023 12:20:59 +0930 Subject: shallow zip-file copy from codec2 e9d726bf20 --- src/c2sim.c | 1168 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1168 insertions(+) create mode 100644 src/c2sim.c (limited to 'src/c2sim.c') diff --git a/src/c2sim.c b/src/c2sim.c new file mode 100644 index 0000000..01ebba1 --- /dev/null +++ b/src/c2sim.c @@ -0,0 +1,1168 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: c2sim.c + AUTHOR......: David Rowe + DATE CREATED: 20/8/2010 + + Codec2 simulation. Combines encoder and decoder and allows + switching in and out various algorithms and quantisation steps. Used + for algorithm development. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "defines.h" +#include "sine.h" +#include "nlp.h" +#include "dump.h" +#include "lpc.h" +#include "lsp.h" +#include "quantise.h" +#include "phase.h" +#include "postfilter.h" +#include "interp.h" +#include "bpf.h" +#include "bpfb.h" +#include "newamp1.h" +#include "lpcnet_freq.h" +#include "sd.h" + +void synth_one_frame(int n_samp, codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem, float gain); +void print_help(const struct option *long_options, int num_opts, char* argv[]); + +#define N_SAMP n_samp /* quick fix for run time sample rate selection */ + +/*---------------------------------------------------------------------------*\ + + MAIN + +\*---------------------------------------------------------------------------*/ + +int main(int argc, char *argv[]) +{ + + int Fs = 8000; + int set_fs; + + int lpc_model = 0, order = LPC_ORD; + int lsp = 0, lspd = 0, lspvq = 0; + int lspjmv = 0; + int prede = 0; + int postfilt; + int hand_voicing = 0, hi = 0, simlpcpf = 0, modelin=0, modelout=0; + int lpcpf = 0; + FILE *fvoicing = 0; + int dec; + int decimate = 1; + int amread, Woread, pahw; + int awread; + int hmread; + int phase0 = 0; + int scalar_quant_Wo_e = 0; + int scalar_quant_Wo_e_low = 0; + int vector_quant_Wo_e = 0; + int dump_pitch_e = 0; + float gain = 1.0; + int bpf_en = 0; + int bpfb_en = 0; + FILE *fam = NULL, *fWo = NULL; + FILE *faw = NULL; + FILE *fhm = NULL; + FILE *fjmv = NULL; + FILE *flspEWov = NULL; + FILE *ften_ms_centre = NULL; + FILE *fmodelout = NULL; + FILE *fmodelin = NULL; + #ifdef DUMP + int dump; + #endif + char out_file[MAX_STR]; + FILE *fout = NULL; /* output speech file */ + int rateK = 0, newamp1vq = 0, rate_K_dec = 0, perframe=0; + int bands = 0, bands_lower_en; + float bands_lower = -1E32; + int K = 20; + float framelength_s = N_S; + int lspEWov = 0, rateKWov = 0, first = 0; + FILE *frateKWov = NULL; + int ten_ms_centre = 0; + FILE *fphasenn = NULL; + FILE *frateK = NULL; + FILE *frateKin = NULL; + int rateKout, rateKin; + FILE *fbands = NULL; + int bands_resample = 0; + + char* opt_string = "ho:"; + struct option long_options[] = { + { "Fs", required_argument, &set_fs, 1 }, + { "rateK", no_argument, &rateK, 1 }, + { "perframe", no_argument, &perframe, 1 }, + { "newamp1vq", no_argument, &newamp1vq, 1 }, + { "rateKdec", required_argument, &rate_K_dec, 1 }, + { "rateKout", required_argument, &rateKout, 1 }, + { "rateKin", required_argument, &rateKin, 1 }, + { "bands",required_argument, &bands, 1 }, + { "bands_lower",required_argument, &bands_lower_en, 1 }, + { "bands_resample", no_argument, &bands_resample, 1 }, + { "lpc", required_argument, &lpc_model, 1 }, + { "lsp", no_argument, &lsp, 1 }, + { "lspd", no_argument, &lspd, 1 }, + { "lspvq", no_argument, &lspvq, 1 }, + { "lspjmv", no_argument, &lspjmv, 1 }, + { "phase0", no_argument, &phase0, 1 }, + { "postfilter", no_argument, &postfilt, 1 }, + { "hand_voicing", required_argument, &hand_voicing, 1 }, + { "dec", required_argument, &dec, 1 }, + { "hi", no_argument, &hi, 1 }, + { "simlpcpf", no_argument, &simlpcpf, 1 }, + { "lpcpf", no_argument, &lpcpf, 1 }, + { "prede", no_argument, &prede, 1 }, + { "dump_pitch_e", required_argument, &dump_pitch_e, 1 }, + { "sq_pitch_e", no_argument, &scalar_quant_Wo_e, 1 }, + { "sq_pitch_e_low", no_argument, &scalar_quant_Wo_e_low, 1 }, + { "vq_pitch_e", no_argument, &vector_quant_Wo_e, 1 }, + { "rate", required_argument, NULL, 0 }, + { "gain", required_argument, NULL, 0 }, + { "bpf", no_argument, &bpf_en, 1 }, + { "bpfb", no_argument, &bpfb_en, 1 }, + { "amread", required_argument, &amread, 1 }, + { "hmread", required_argument, &hmread, 1 }, + { "awread", required_argument, &awread, 1 }, + { "Woread", required_argument, &Woread, 1 }, + { "pahw", required_argument, &pahw, 1 }, + { "lspEWov", required_argument, &lspEWov, 1 }, + { "rateKWov", required_argument, &rateKWov, 1 }, + { "first", no_argument, &first, 1 }, + { "ten_ms_centre", required_argument, &ten_ms_centre, 1 }, + { "framelength_s", required_argument, NULL, 0 }, + { "modelout", required_argument, &modelout, 1 }, + { "modelin", required_argument, &modelin, 1 }, + #ifdef DUMP + { "dump", required_argument, &dump, 1 }, + #endif + { "help", no_argument, NULL, 'h' }, + { NULL, no_argument, NULL, 0 } + }; + int num_opts=sizeof(long_options)/sizeof(struct option); + + /*----------------------------------------------------------------*\ + + Interpret Command Line Arguments + + \*----------------------------------------------------------------*/ + + if (argc < 2) { + print_help(long_options, num_opts, argv); + } + + while(1) { + int option_index = 0; + int opt = getopt_long(argc, argv, opt_string, + long_options, &option_index); + if (opt == -1) + break; + switch (opt) { + case 0: + if(strcmp(long_options[option_index].name, "Fs") == 0) { + Fs= atoi(optarg); + if((Fs != 8000) && (Fs != 16000)) { + fprintf(stderr, "Error Fs must be 8000 or 16000\n"); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "lpc") == 0) { + order = atoi(optarg); + #ifdef DUMP + } else if(strcmp(long_options[option_index].name, "dump") == 0) { + if (dump) + dump_on(optarg); + #endif + } else if(strcmp(long_options[option_index].name, "lsp") == 0 + || strcmp(long_options[option_index].name, "lspd") == 0 + || strcmp(long_options[option_index].name, "lspvq") == 0) { + assert(order == LPC_ORD); + } else if(strcmp(long_options[option_index].name, "rateKdec") == 0) { + rate_K_dec = atoi(optarg); + fprintf(stderr, "rate_K_dec: %d\n", rate_K_dec); + } else if(strcmp(long_options[option_index].name, "rateKout") == 0) { + /* read model records from file or stdin */ + if ((frateK = fopen(optarg,"wb")) == NULL) { + fprintf(stderr, "Error opening output rateK file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + fprintf(stderr, "each record is %d bytes\n", (int)(K*sizeof(float))); + } else if(strcmp(long_options[option_index].name, "rateKin") == 0) { + /* read model records from file or stdin */ + if ((frateKin = fopen(optarg,"rb")) == NULL) { + fprintf(stderr, "Error opening input rateK file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + fprintf(stderr, "each record is %d bytes\n", (int)(K*sizeof(float))); + } else if(strcmp(long_options[option_index].name, "bands") == 0) { + /* write mel spaced band energies to file or stdout */ + if ((fbands = fopen(optarg,"wb")) == NULL) { + fprintf(stderr, "Error opening bands file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "bands_lower") == 0) { + bands_lower = atof(optarg); + fprintf(stderr, "bands_lower: %f\n", bands_lower); + } else if(strcmp(long_options[option_index].name, "dec") == 0) { + + decimate = atoi(optarg); + if ((decimate != 2) && (decimate != 3) && (decimate != 4)) { + fprintf(stderr, "Error in --dec, must be 2, 3, or 4\n"); + exit(1); + } + + if (!phase0) { + fprintf(stderr, "needs --phase0 to resample phase when using --dec\n"); + exit(1); + } + if (!lpc_model) { + fprintf(stderr, "needs --lpc [order] to resample amplitudes when using --dec\n"); + exit(1); + } + + } else if(strcmp(long_options[option_index].name, "hand_voicing") == 0) { + if ((fvoicing = fopen(optarg,"rt")) == NULL) { + fprintf(stderr, "Error opening voicing file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "Woread") == 0) { + if ((fWo = fopen(optarg,"rb")) == NULL) { + fprintf(stderr, "Error opening float Wo file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "amread") == 0) { + if ((fam = fopen(optarg,"rb")) == NULL) { + fprintf(stderr, "Error opening float Am file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "hmread") == 0) { + if ((fhm = fopen(optarg,"rb")) == NULL) { + fprintf(stderr, "Error opening float Hm file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "awread") == 0) { + if ((faw = fopen(optarg,"rb")) == NULL) { + fprintf(stderr, "Error opening float Aw file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "dump_pitch_e") == 0) { + if ((fjmv = fopen(optarg,"wt")) == NULL) { + fprintf(stderr, "Error opening pitch & energy dump file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "gain") == 0) { + gain = atof(optarg); + } else if(strcmp(long_options[option_index].name, "framelength_s") == 0) { + framelength_s = atof(optarg); + } else if(strcmp(long_options[option_index].name, "pahw") == 0) { + + /* set up a bunch of arguments instead of having to enter them on cmd line every time */ + + phase0 = postfilt = amread = hmread = Woread = 1; + char file_name[MAX_STR]; + sprintf(file_name, "%s_am.out", optarg); + fprintf(stderr, "reading %s", file_name); + if ((fam = fopen(file_name,"rb")) == NULL) { + fprintf(stderr, "Error opening float Am file: %s: %s.\n", + file_name, strerror(errno)); + exit(1); + } + sprintf(file_name, "%s_hm.out", optarg); + fprintf(stderr, " %s", file_name); + if ((fhm = fopen(file_name,"rb")) == NULL) { + fprintf(stderr, "Error opening float Hm file: %s: %s.\n", + file_name, strerror(errno)); + exit(1); + } + sprintf(file_name, "%s_Wo.out", optarg); + fprintf(stderr, " %s\n", file_name); + if ((fWo = fopen(file_name,"rb")) == NULL) { + fprintf(stderr, "Error opening float Wo file: %s: %s.\n", + file_name, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "lspEWov") == 0) { + /* feature file for deep learning experiments */ + lpc_model = 1; phase0 = 1; + if ((flspEWov = fopen(optarg,"wb")) == NULL) { + fprintf(stderr, "Error opening lspEWov float file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "rateKWov") == 0) { + /* feature file for deep learning experiments */ + rateK = 1; newamp1vq = 1; + if ((frateKWov = fopen(optarg,"wb")) == NULL) { + fprintf(stderr, "Error opening rateKWov float file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "ten_ms_centre") == 0) { + /* dump 10ms of audio centred on analysis frame to check time alignment with + 16 kHz source audio */ + ten_ms_centre = 1; + if ((ften_ms_centre = fopen(optarg,"wb")) == NULL) { + fprintf(stderr, "Error opening ten_ms_centre short file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + } else if(strcmp(long_options[option_index].name, "modelout") == 0) { + /* write model records to file or stdout */ + modelout = 1; + if (strcmp(optarg, "-") == 0) fmodelout = stdout; + else if ((fmodelout = fopen(optarg,"wb")) == NULL) { + fprintf(stderr, "Error opening modelout file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + fprintf(stderr, "each model record is %d bytes\n", (int)sizeof(MODEL)); + } else if(strcmp(long_options[option_index].name, "modelin") == 0) { + /* read model records from file or stdin */ + modelin = 1; + if (strcmp(optarg, "-") == 0) fmodelin = stdin; + else if ((fmodelin = fopen(optarg,"rb")) == NULL) { + fprintf(stderr, "Error opening modelin file: %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + fprintf(stderr, "each model record is %d bytes\n", (int)sizeof(MODEL)); + } else if(strcmp(long_options[option_index].name, "rate") == 0) { + if(strcmp(optarg,"3200") == 0) { + lpc_model = 1; + scalar_quant_Wo_e = 1; + lspd = 1; + phase0 = 1; + postfilt = 1; + decimate = 1; + lpcpf = 1; + } else if(strcmp(optarg,"2400") == 0) { + lpc_model = 1; + vector_quant_Wo_e = 1; + lsp = 1; + phase0 = 1; + postfilt = 1; + decimate = 2; + lpcpf = 1; + } else if(strcmp(optarg,"1400") == 0) { + lpc_model = 1; + vector_quant_Wo_e = 1; + lsp = 1; + phase0 = 1; + postfilt = 1; + decimate = 4; + lpcpf = 1; + } else if(strcmp(optarg,"1300") == 0) { + lpc_model = 1; + scalar_quant_Wo_e = 1; + lsp = 1; + phase0 = 1; + postfilt = 1; + decimate = 4; + lpcpf = 1; + } else if(strcmp(optarg,"1200") == 0) { + lpc_model = 1; + scalar_quant_Wo_e = 1; + lspjmv = 1; + phase0 = 1; + postfilt = 1; + decimate = 4; + lpcpf = 1; + } else { + fprintf(stderr, "Error: invalid output rate (3200|2400|1400|1200) %s\n", optarg); + exit(1); + } + } + break; + + case 'h': + print_help(long_options, num_opts, argv); + break; + + case 'o': + if (strcmp(optarg, "-") == 0) fout = stdout; + else if ((fout = fopen(optarg,"wb")) == NULL) { + fprintf(stderr, "Error opening output speech file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } + strcpy(out_file,optarg); + break; + + default: + /* This will never be reached */ + break; + } + } + + /* Input file */ + + FILE *fin; /* input speech file */ + if (strcmp(argv[optind], "-") == 0) fin = stdin; + else if ((fin = fopen(argv[optind],"rb")) == NULL) { + fprintf(stderr, "Error opening input speech file: %s: %s.\n", + argv[optind], strerror(errno)); + exit(1); + } + + C2CONST c2const = c2const_create(Fs, framelength_s); + int n_samp = c2const.n_samp; + int m_pitch = c2const.m_pitch; + + short buf[N_SAMP]; /* input/output buffer */ + float buf_float[N_SAMP]; + float Sn[m_pitch]; /* float input speech samples */ + float Sn_pre[m_pitch]; /* pre-emphasised input speech samples */ + COMP Sw[FFT_ENC]; /* DFT of Sn[] */ + codec2_fft_cfg fft_fwd_cfg; + codec2_fftr_cfg fftr_fwd_cfg; + codec2_fftr_cfg fftr_inv_cfg; + float w[m_pitch]; /* time domain hamming window */ + float W[FFT_ENC]; /* DFT of w[] */ + MODEL model; + float Pn[2*N_SAMP]; /* trapezoidal synthesis window */ + float Sn_[2*N_SAMP]; /* synthesised speech */ + int i,m; /* loop variable */ + int frames; + float prev_f0; + float pitch; + float snr; + float sum_snr; + + float pre_mem = 0.0, de_mem = 0.0; + float ak[1+order]; + // COMP Sw_[FFT_ENC]; + // COMP Ew[FFT_ENC]; + + float ex_phase[MAX_AMP+1]; + + float bg_est = 0.0; + + + MODEL prev_model; + float lsps[order]; + float e, prev_e; + int lsp_indexes[order]; + float lsps_[order]; + float Woe_[2]; + + float lsps_dec[4][order], e_dec[4], weight, weight_inc, ak_dec[4][order]; + MODEL model_dec[4], prev_model_dec; + float prev_lsps_dec[order], prev_e_dec; + + void *nlp_states; + float hpf_states[2]; + #if 0 + struct PEXP *pexp = NULL; + struct AEXP *aexp = NULL; + #endif + float bpf_buf[BPF_N+N_SAMP]; + + COMP Aw[FFT_ENC]; + COMP H[MAX_AMP]; + + float sd_sum = 0.0; int sd_frames = 0; + + for(i=0; i bands_lower) + assert(fwrite(bandE, sizeof(float), nbands, fbands) == nbands); + // optionally reconstruct [Am} by linear interpolation of band energies, + // this doesn't sound very Good + if (bands_resample) + resample_rate_L(&c2const, &model, &bandE[1], &freqkHz[1], nbands-2); + } + + /*------------------------------------------------------------*\ + + Optional newamp1 simulation, as used in 700C + + \*------------------------------------------------------------*/ + + if (rateK) { + float rate_K_vec[K]; + resample_const_rate_f(&c2const, &model, rate_K_vec, rate_K_sample_freqs_kHz, K); + + if (frateK != NULL) + assert(fwrite(rate_K_vec, sizeof(float), K, frateK) == K); + + if (frateKin != NULL) { + assert(fread(rate_K_vec, sizeof(float), K, frateKin) == K); + /* apply newamp1 postfilter - this helped male samples with VQVAE work */ + float sum = 0.0; + for(int k=0; k bands_lower) + fwrite(&model_dec[i],sizeof(MODEL),1,fmodelout); + } + else + fwrite(&model_dec[i],sizeof(MODEL),1,fmodelout); + } + } + + /* update memories for next frame ----------------------------*/ + + prev_model_dec = model_dec[decimate-1]; + prev_e_dec = e_dec[decimate-1]; + for(i=0; i{Am} SNR av: %5.2f dB over %d frames\n", sum_snr/frames, frames); + if (lsp || lspd || lspjmv) + fprintf(stderr, "LSP quantiser SD: %5.2f dB*dB over %d frames\n", sd_sum/sd_frames, sd_frames); + } + if (newamp1vq) { + fprintf(stderr, "var: %3.2f dB*dB\n", se/nse); + } + #ifdef DUMP + if (dump) + dump_off(); + #endif + + if (hand_voicing) + fclose(fvoicing); + + nlp_destroy(nlp_states); + + if (fam != NULL) fclose(fam); + if (fWo != NULL) fclose(fWo); + if (faw != NULL) fclose(faw); + if (fhm != NULL) fclose(fhm); + if (fjmv != NULL) fclose(fjmv); + if (flspEWov != NULL) fclose(flspEWov); + if (fphasenn != NULL) fclose(fphasenn); + if (frateK != NULL) fclose(frateK); + if (frateKin != NULL) fclose(frateKin); + if (ften_ms_centre != NULL) fclose(ften_ms_centre); + if (fmodelout != NULL) fclose(fmodelout); + if (fbands != NULL) fclose(fbands); + if (frateKWov != NULL) fclose(frateKWov); + + return 0; +} + +void synth_one_frame(int n_samp, codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[], + float Pn[], int prede, float *de_mem, float gain) +{ + int i; + + synthesise(n_samp, fftr_inv_cfg, Sn_, model, Pn, 1); + if (prede) + de_emp(Sn_, Sn_, de_mem, n_samp); + + for(i=0; i 32767.0) + buf[i] = 32767; + else if (Sn_[i] < -32767.0) + buf[i] = -32767; + else + buf[i] = Sn_[i]; + } + +} + +void print_help(const struct option* long_options, int num_opts, char* argv[]) +{ + int i; + char *option_parameters; + + fprintf(stderr, "\nCodec2 - low bit rate speech codec - Simulation Program\n" + "\thttp://rowetel.com/codec2.html\n\n" + "usage: %s [OPTIONS] \n\n" + "Options:\n" + "\t-o \n", argv[0]); + for(i=0; i