diff options
Diffstat (limited to 'src/codec2.c')
| -rw-r--r-- | src/codec2.c | 292 |
1 files changed, 5 insertions, 287 deletions
diff --git a/src/codec2.c b/src/codec2.c index 264141c..52602e3 100644 --- a/src/codec2.c +++ b/src/codec2.c @@ -83,11 +83,6 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[], void codec2_encode_700c(struct CODEC2 *c2, unsigned char *bits, short speech[]); void codec2_decode_700c(struct CODEC2 *c2, short speech[], const unsigned char *bits); -void codec2_encode_450(struct CODEC2 *c2, unsigned char *bits, short speech[]); -void codec2_decode_450(struct CODEC2 *c2, short speech[], - const unsigned char *bits); -void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], - const unsigned char *bits); static void ear_protection(float in_out[], int n); /*---------------------------------------------------------------------------*\ @@ -110,7 +105,6 @@ static void ear_protection(float in_out[], int n); \*---------------------------------------------------------------------------*/ -// Don't create CODEC2_MODE_450PWB for Encoding as it has undefined behavior ! struct CODEC2 *codec2_create(int mode) { struct CODEC2 *c2; int i, l; @@ -125,9 +119,7 @@ struct CODEC2 *codec2_create(int mode) { CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) || CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) || - CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode) || - CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode) || - CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode))) { + CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode))) { return NULL; } @@ -138,11 +130,7 @@ struct CODEC2 *codec2_create(int mode) { /* store constants in a few places for convenience */ - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) == 0) { - c2->c2const = c2const_create(8000, N_S); - } else { - c2->c2const = c2const_create(16000, N_S); - } + c2->c2const = c2const_create(8000, N_S); c2->Fs = c2->c2const.Fs; int n_samp = c2->n_samp = c2->c2const.n_samp; int m_pitch = c2->m_pitch = c2->c2const.m_pitch; @@ -231,40 +219,10 @@ struct CODEC2 *codec2_create(int mode) { c2->eq_en = false; c2->Wo_left = 0.0; c2->voicing_left = 0; - ; c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL); c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL); } - /* newamp2 initialisation */ - - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { - n2_mel_sample_freqs_kHz(c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K); - int k; - for (k = 0; k < NEWAMP2_K; k++) { - c2->n2_prev_rate_K_vec_[k] = 0.0; - } - c2->Wo_left = 0.0; - c2->voicing_left = 0; - ; - c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); - c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); - } - /* newamp2 PWB initialisation */ - - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { - n2_mel_sample_freqs_kHz(c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K); - int k; - for (k = 0; k < NEWAMP2_16K_K; k++) { - c2->n2_pwb_prev_rate_K_vec_[k] = 0.0; - } - c2->Wo_left = 0.0; - c2->voicing_left = 0; - ; - c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL); - c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL); - } - c2->fmlfeat = NULL; c2->fmlmodel = NULL; @@ -310,17 +268,6 @@ struct CODEC2 *codec2_create(int mode) { c2->decode = codec2_decode_700c; } - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { - c2->encode = codec2_encode_450; - c2->decode = codec2_decode_450; - } - - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { - // Encode PWB doesn't make sense - c2->encode = codec2_encode_450; - c2->decode = codec2_decode_450pwb; - } - return c2; } @@ -345,14 +292,6 @@ void codec2_destroy(struct CODEC2 *c2) { codec2_fft_free(c2->phase_fft_fwd_cfg); codec2_fft_free(c2->phase_fft_inv_cfg); } - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) { - codec2_fft_free(c2->phase_fft_fwd_cfg); - codec2_fft_free(c2->phase_fft_inv_cfg); - } - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { - codec2_fft_free(c2->phase_fft_fwd_cfg); - codec2_fft_free(c2->phase_fft_inv_cfg); - } FREE(c2->Pn); FREE(c2->Sn); FREE(c2->w); @@ -378,8 +317,6 @@ int codec2_bits_per_frame(struct CODEC2 *c2) { if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 52; if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 48; if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 28; - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) return 18; - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) return 18; return 0; /* shouldn't get here */ } @@ -417,8 +354,6 @@ int codec2_samples_per_frame(struct CODEC2 *c2) { if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 320; if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 320; if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 320; - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) return 320; - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) return 640; return 0; /* shouldn't get here */ } @@ -1640,26 +1575,6 @@ float codec2_energy_700c(struct CODEC2 *c2, const unsigned char *bits) { return POW10F(mean / 10.0); } -float codec2_energy_450(struct CODEC2 *c2, const unsigned char *bits) { - int indexes[4]; - unsigned int nbit = 0; - - assert(c2 != NULL); - - /* unpack bits from channel ------------------------------------*/ - - indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); - // indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); - indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); - indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); - - float mean = newamp2_energy_cb[0].cb[indexes[2]]; - mean -= 10; - if (indexes[3] == 0) mean -= 10; - - return POW10F(mean / 10.0); -} - /*---------------------------------------------------------------------------*\ FUNCTION....: codec2_get_energy() @@ -1678,9 +1593,7 @@ float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) { (CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) || (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) || (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) || - (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) || - (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) || - (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode))); + (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode))); MODEL model; float xq_dec[2] = {}; int e_index, WoE_index; @@ -1720,203 +1633,10 @@ float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) { if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { e = codec2_energy_700c(c2, bits); } - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || - CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { - e = codec2_energy_450(c2, bits); - } return e; } -/*---------------------------------------------------------------------------*\ - - FUNCTION....: codec2_encode_450 - AUTHOR......: Thomas Kurin and Stefan Erhardt - INSTITUTE...: Institute for Electronics Engineering, University of -Erlangen-Nuremberg DATE CREATED: July 2018 - - 450 bit/s codec that uses newamp2 fixed rate VQ of amplitudes. - - Encodes 320 speech samples (40ms of speech) into 28 bits. - - The codec2 algorithm actually operates internally on 10ms (80 - sample) frames, so we run the encoding algorithm four times: - - frame 0: nothing - frame 1: nothing - frame 2: nothing - frame 3: 9 bit 1 stage VQ, 3 bits energy, - 6 bit scalar Wo/voicing/plosive. No spare bits. - - If a plosive is detected the frame at the energy-step is encoded. - - Voicing is encoded using the 000000 index of the Wo quantiser. - Plosive is encoded using the 111111 index of the Wo quantiser. - - The bit allocation is: - - Parameter frames 1-3 frame 4 Total - ----------------------------------------------------------- - Harmonic magnitudes (rate k VQ) 0 9 9 - Energy 0 3 3 - log Wo/voicing/plosive 0 6 6 - TOTAL 0 18 18 - - -\*---------------------------------------------------------------------------*/ - -void codec2_encode_450(struct CODEC2 *c2, unsigned char *bits, short speech[]) { - MODEL model; - int indexes[4], i, h, M = 4; - unsigned int nbit = 0; - int plosiv = 0; - float energydelta[M]; - int spectralCounter; - - assert(c2 != NULL); - - memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8)); - for (i = 0; i < M; i++) { - analyse_one_frame(c2, &model, &speech[i * c2->n_samp]); - energydelta[i] = 0; - spectralCounter = 0; - for (h = 0; h < (model.L); h++) { - // only detect above 300 Hz - if (h * model.Wo * (c2->c2const.Fs / 2000.0) / M_PI > 0.3) { - energydelta[i] = - (double)energydelta[i] + (double)20.0 * log10(model.A[10] + 1E-16); - spectralCounter = spectralCounter + 1; - } - } - energydelta[i] = energydelta[i] / spectralCounter; - } - // Constants for plosive Detection tdB = threshold; minPwr = from below this - // level plosives have to rise - float tdB = 15; // not fixed can be changed - float minPwr = 15; // not fixed can be changed - if ((c2->energy_prev) < minPwr && - energydelta[0] > ((c2->energy_prev) + tdB)) { - plosiv = 1; - } - if (energydelta[0] < minPwr && energydelta[1] > (energydelta[0] + tdB)) { - plosiv = 2; - } - if (energydelta[1] < minPwr && energydelta[2] > (energydelta[1] + tdB)) { - plosiv = 3; - } - if (energydelta[2] < minPwr && energydelta[3] > (energydelta[2] + tdB)) { - plosiv = 4; - } - if (plosiv != 0 && plosiv != 4) { - analyse_one_frame(c2, &model, &speech[(plosiv - 1) * c2->n_samp]); - } - - c2->energy_prev = energydelta[3]; - - int K = 29; - float rate_K_vec[K], mean; - float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K]; - if (plosiv > 0) { - plosiv = 1; - } - newamp2_model_to_indexes(&c2->c2const, indexes, &model, rate_K_vec, - c2->n2_rate_K_sample_freqs_kHz, K, &mean, - rate_K_vec_no_mean, rate_K_vec_no_mean_, plosiv); - - pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0); - // pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0); - pack_natural_or_gray(bits, &nbit, indexes[2], 3, 0); - pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0); - - assert(nbit == (unsigned)codec2_bits_per_frame(c2)); -} - -/*---------------------------------------------------------------------------*\ - - FUNCTION....: codec2_decode_450 - AUTHOR......: Thomas Kurin and Stefan Erhardt - INSTITUTE...: Institute for Electronics Engineering, University of -Erlangen-Nuremberg DATE CREATED: July 2018 - -\*---------------------------------------------------------------------------*/ - -void codec2_decode_450(struct CODEC2 *c2, short speech[], - const unsigned char *bits) { - MODEL model[4]; - int indexes[4]; - int i; - unsigned int nbit = 0; - - assert(c2 != NULL); - - /* unpack bits from channel ------------------------------------*/ - - indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); - // indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); - indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); - indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); - - int M = 4; - COMP HH[M][MAX_AMP + 1]; - float interpolated_surface_[M][NEWAMP2_K]; - int pwbFlag = 0; - - newamp2_indexes_to_model( - &c2->c2const, model, (COMP *)HH, (float *)interpolated_surface_, - c2->n2_prev_rate_K_vec_, &c2->Wo_left, &c2->voicing_left, - c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K, c2->phase_fft_fwd_cfg, - c2->phase_fft_inv_cfg, indexes, 1.5, pwbFlag); - - for (i = 0; i < M; i++) { - synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], &HH[i][0], - 1.5); - } -} - -/*---------------------------------------------------------------------------*\ - - FUNCTION....: codec2_decode_450pwb - AUTHOR......: Thomas Kurin and Stefan Erhardt - INSTITUTE...: Institute for Electronics Engineering, University of -Erlangen-Nuremberg DATE CREATED: July 2018 - - Decodes the 450 codec data in pseudo wideband at 16kHz samplerate. - -\*---------------------------------------------------------------------------*/ - -void codec2_decode_450pwb(struct CODEC2 *c2, short speech[], - const unsigned char *bits) { - MODEL model[4]; - int indexes[4]; - int i; - unsigned int nbit = 0; - - assert(c2 != NULL); - - /* unpack bits from channel ------------------------------------*/ - - indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0); - // indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0); - indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0); - indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0); - - int M = 4; - COMP HH[M][MAX_AMP + 1]; - float interpolated_surface_[M][NEWAMP2_16K_K]; - int pwbFlag = 1; - - newamp2_indexes_to_model( - &c2->c2const, model, (COMP *)HH, (float *)interpolated_surface_, - c2->n2_pwb_prev_rate_K_vec_, &c2->Wo_left, &c2->voicing_left, - c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K, c2->phase_fft_fwd_cfg, - c2->phase_fft_inv_cfg, indexes, 1.5, pwbFlag); - - for (i = 0; i < M; i++) { - synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], &HH[i][0], - 1.5); - } -} - /*---------------------------------------------------------------------------* \ FUNCTION....: synthesise_one_frame() @@ -1931,10 +1651,8 @@ void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model, COMP Aw[], float gain) { int i; - if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode) || - CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) || - CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) { - /* newamp1/2, we've already worked out rate L phase */ + if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) { + /* newamp1, we've already worked out rate L phase */ COMP *H = Aw; phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H); } else { |
