aboutsummaryrefslogtreecommitdiff
path: root/src/codec2.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/codec2.c')
-rw-r--r--src/codec2.c292
1 files changed, 5 insertions, 287 deletions
diff --git a/src/codec2.c b/src/codec2.c
index 264141c..52602e3 100644
--- a/src/codec2.c
+++ b/src/codec2.c
@@ -83,11 +83,6 @@ void codec2_decode_1200(struct CODEC2 *c2, short speech[],
void codec2_encode_700c(struct CODEC2 *c2, unsigned char *bits, short speech[]);
void codec2_decode_700c(struct CODEC2 *c2, short speech[],
const unsigned char *bits);
-void codec2_encode_450(struct CODEC2 *c2, unsigned char *bits, short speech[]);
-void codec2_decode_450(struct CODEC2 *c2, short speech[],
- const unsigned char *bits);
-void codec2_decode_450pwb(struct CODEC2 *c2, short speech[],
- const unsigned char *bits);
static void ear_protection(float in_out[], int n);
/*---------------------------------------------------------------------------*\
@@ -110,7 +105,6 @@ static void ear_protection(float in_out[], int n);
\*---------------------------------------------------------------------------*/
-// Don't create CODEC2_MODE_450PWB for Encoding as it has undefined behavior !
struct CODEC2 *codec2_create(int mode) {
struct CODEC2 *c2;
int i, l;
@@ -125,9 +119,7 @@ struct CODEC2 *codec2_create(int mode) {
CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, mode) ||
CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, mode) ||
CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, mode) ||
- CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode) ||
- CODEC2_MODE_ACTIVE(CODEC2_MODE_450, mode) ||
- CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode))) {
+ CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, mode))) {
return NULL;
}
@@ -138,11 +130,7 @@ struct CODEC2 *codec2_create(int mode) {
/* store constants in a few places for convenience */
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, mode) == 0) {
- c2->c2const = c2const_create(8000, N_S);
- } else {
- c2->c2const = c2const_create(16000, N_S);
- }
+ c2->c2const = c2const_create(8000, N_S);
c2->Fs = c2->c2const.Fs;
int n_samp = c2->n_samp = c2->c2const.n_samp;
int m_pitch = c2->m_pitch = c2->c2const.m_pitch;
@@ -231,40 +219,10 @@ struct CODEC2 *codec2_create(int mode) {
c2->eq_en = false;
c2->Wo_left = 0.0;
c2->voicing_left = 0;
- ;
c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 0, NULL, NULL);
c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP1_PHASE_NFFT, 1, NULL, NULL);
}
- /* newamp2 initialisation */
-
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) {
- n2_mel_sample_freqs_kHz(c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K);
- int k;
- for (k = 0; k < NEWAMP2_K; k++) {
- c2->n2_prev_rate_K_vec_[k] = 0.0;
- }
- c2->Wo_left = 0.0;
- c2->voicing_left = 0;
- ;
- c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL);
- c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL);
- }
- /* newamp2 PWB initialisation */
-
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
- n2_mel_sample_freqs_kHz(c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K);
- int k;
- for (k = 0; k < NEWAMP2_16K_K; k++) {
- c2->n2_pwb_prev_rate_K_vec_[k] = 0.0;
- }
- c2->Wo_left = 0.0;
- c2->voicing_left = 0;
- ;
- c2->phase_fft_fwd_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 0, NULL, NULL);
- c2->phase_fft_inv_cfg = codec2_fft_alloc(NEWAMP2_PHASE_NFFT, 1, NULL, NULL);
- }
-
c2->fmlfeat = NULL;
c2->fmlmodel = NULL;
@@ -310,17 +268,6 @@ struct CODEC2 *codec2_create(int mode) {
c2->decode = codec2_decode_700c;
}
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) {
- c2->encode = codec2_encode_450;
- c2->decode = codec2_decode_450;
- }
-
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
- // Encode PWB doesn't make sense
- c2->encode = codec2_encode_450;
- c2->decode = codec2_decode_450pwb;
- }
-
return c2;
}
@@ -345,14 +292,6 @@ void codec2_destroy(struct CODEC2 *c2) {
codec2_fft_free(c2->phase_fft_fwd_cfg);
codec2_fft_free(c2->phase_fft_inv_cfg);
}
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) {
- codec2_fft_free(c2->phase_fft_fwd_cfg);
- codec2_fft_free(c2->phase_fft_inv_cfg);
- }
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
- codec2_fft_free(c2->phase_fft_fwd_cfg);
- codec2_fft_free(c2->phase_fft_inv_cfg);
- }
FREE(c2->Pn);
FREE(c2->Sn);
FREE(c2->w);
@@ -378,8 +317,6 @@ int codec2_bits_per_frame(struct CODEC2 *c2) {
if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 52;
if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 48;
if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 28;
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) return 18;
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) return 18;
return 0; /* shouldn't get here */
}
@@ -417,8 +354,6 @@ int codec2_samples_per_frame(struct CODEC2 *c2) {
if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) return 320;
if (CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) return 320;
if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) return 320;
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) return 320;
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) return 640;
return 0; /* shouldn't get here */
}
@@ -1640,26 +1575,6 @@ float codec2_energy_700c(struct CODEC2 *c2, const unsigned char *bits) {
return POW10F(mean / 10.0);
}
-float codec2_energy_450(struct CODEC2 *c2, const unsigned char *bits) {
- int indexes[4];
- unsigned int nbit = 0;
-
- assert(c2 != NULL);
-
- /* unpack bits from channel ------------------------------------*/
-
- indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
- // indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
- indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
- indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
-
- float mean = newamp2_energy_cb[0].cb[indexes[2]];
- mean -= 10;
- if (indexes[3] == 0) mean -= 10;
-
- return POW10F(mean / 10.0);
-}
-
/*---------------------------------------------------------------------------*\
FUNCTION....: codec2_get_energy()
@@ -1678,9 +1593,7 @@ float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) {
(CODEC2_MODE_ACTIVE(CODEC2_MODE_1400, c2->mode)) ||
(CODEC2_MODE_ACTIVE(CODEC2_MODE_1300, c2->mode)) ||
(CODEC2_MODE_ACTIVE(CODEC2_MODE_1200, c2->mode)) ||
- (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) ||
- (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode)) ||
- (CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)));
+ (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)));
MODEL model;
float xq_dec[2] = {};
int e_index, WoE_index;
@@ -1720,203 +1633,10 @@ float codec2_get_energy(struct CODEC2 *c2, const unsigned char *bits) {
if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
e = codec2_energy_700c(c2, bits);
}
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) ||
- CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
- e = codec2_energy_450(c2, bits);
- }
return e;
}
-/*---------------------------------------------------------------------------*\
-
- FUNCTION....: codec2_encode_450
- AUTHOR......: Thomas Kurin and Stefan Erhardt
- INSTITUTE...: Institute for Electronics Engineering, University of
-Erlangen-Nuremberg DATE CREATED: July 2018
-
- 450 bit/s codec that uses newamp2 fixed rate VQ of amplitudes.
-
- Encodes 320 speech samples (40ms of speech) into 28 bits.
-
- The codec2 algorithm actually operates internally on 10ms (80
- sample) frames, so we run the encoding algorithm four times:
-
- frame 0: nothing
- frame 1: nothing
- frame 2: nothing
- frame 3: 9 bit 1 stage VQ, 3 bits energy,
- 6 bit scalar Wo/voicing/plosive. No spare bits.
-
- If a plosive is detected the frame at the energy-step is encoded.
-
- Voicing is encoded using the 000000 index of the Wo quantiser.
- Plosive is encoded using the 111111 index of the Wo quantiser.
-
- The bit allocation is:
-
- Parameter frames 1-3 frame 4 Total
- -----------------------------------------------------------
- Harmonic magnitudes (rate k VQ) 0 9 9
- Energy 0 3 3
- log Wo/voicing/plosive 0 6 6
- TOTAL 0 18 18
-
-
-\*---------------------------------------------------------------------------*/
-
-void codec2_encode_450(struct CODEC2 *c2, unsigned char *bits, short speech[]) {
- MODEL model;
- int indexes[4], i, h, M = 4;
- unsigned int nbit = 0;
- int plosiv = 0;
- float energydelta[M];
- int spectralCounter;
-
- assert(c2 != NULL);
-
- memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
- for (i = 0; i < M; i++) {
- analyse_one_frame(c2, &model, &speech[i * c2->n_samp]);
- energydelta[i] = 0;
- spectralCounter = 0;
- for (h = 0; h < (model.L); h++) {
- // only detect above 300 Hz
- if (h * model.Wo * (c2->c2const.Fs / 2000.0) / M_PI > 0.3) {
- energydelta[i] =
- (double)energydelta[i] + (double)20.0 * log10(model.A[10] + 1E-16);
- spectralCounter = spectralCounter + 1;
- }
- }
- energydelta[i] = energydelta[i] / spectralCounter;
- }
- // Constants for plosive Detection tdB = threshold; minPwr = from below this
- // level plosives have to rise
- float tdB = 15; // not fixed can be changed
- float minPwr = 15; // not fixed can be changed
- if ((c2->energy_prev) < minPwr &&
- energydelta[0] > ((c2->energy_prev) + tdB)) {
- plosiv = 1;
- }
- if (energydelta[0] < minPwr && energydelta[1] > (energydelta[0] + tdB)) {
- plosiv = 2;
- }
- if (energydelta[1] < minPwr && energydelta[2] > (energydelta[1] + tdB)) {
- plosiv = 3;
- }
- if (energydelta[2] < minPwr && energydelta[3] > (energydelta[2] + tdB)) {
- plosiv = 4;
- }
- if (plosiv != 0 && plosiv != 4) {
- analyse_one_frame(c2, &model, &speech[(plosiv - 1) * c2->n_samp]);
- }
-
- c2->energy_prev = energydelta[3];
-
- int K = 29;
- float rate_K_vec[K], mean;
- float rate_K_vec_no_mean[K], rate_K_vec_no_mean_[K];
- if (plosiv > 0) {
- plosiv = 1;
- }
- newamp2_model_to_indexes(&c2->c2const, indexes, &model, rate_K_vec,
- c2->n2_rate_K_sample_freqs_kHz, K, &mean,
- rate_K_vec_no_mean, rate_K_vec_no_mean_, plosiv);
-
- pack_natural_or_gray(bits, &nbit, indexes[0], 9, 0);
- // pack_natural_or_gray(bits, &nbit, indexes[1], 9, 0);
- pack_natural_or_gray(bits, &nbit, indexes[2], 3, 0);
- pack_natural_or_gray(bits, &nbit, indexes[3], 6, 0);
-
- assert(nbit == (unsigned)codec2_bits_per_frame(c2));
-}
-
-/*---------------------------------------------------------------------------*\
-
- FUNCTION....: codec2_decode_450
- AUTHOR......: Thomas Kurin and Stefan Erhardt
- INSTITUTE...: Institute for Electronics Engineering, University of
-Erlangen-Nuremberg DATE CREATED: July 2018
-
-\*---------------------------------------------------------------------------*/
-
-void codec2_decode_450(struct CODEC2 *c2, short speech[],
- const unsigned char *bits) {
- MODEL model[4];
- int indexes[4];
- int i;
- unsigned int nbit = 0;
-
- assert(c2 != NULL);
-
- /* unpack bits from channel ------------------------------------*/
-
- indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
- // indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
- indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
- indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
-
- int M = 4;
- COMP HH[M][MAX_AMP + 1];
- float interpolated_surface_[M][NEWAMP2_K];
- int pwbFlag = 0;
-
- newamp2_indexes_to_model(
- &c2->c2const, model, (COMP *)HH, (float *)interpolated_surface_,
- c2->n2_prev_rate_K_vec_, &c2->Wo_left, &c2->voicing_left,
- c2->n2_rate_K_sample_freqs_kHz, NEWAMP2_K, c2->phase_fft_fwd_cfg,
- c2->phase_fft_inv_cfg, indexes, 1.5, pwbFlag);
-
- for (i = 0; i < M; i++) {
- synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], &HH[i][0],
- 1.5);
- }
-}
-
-/*---------------------------------------------------------------------------*\
-
- FUNCTION....: codec2_decode_450pwb
- AUTHOR......: Thomas Kurin and Stefan Erhardt
- INSTITUTE...: Institute for Electronics Engineering, University of
-Erlangen-Nuremberg DATE CREATED: July 2018
-
- Decodes the 450 codec data in pseudo wideband at 16kHz samplerate.
-
-\*---------------------------------------------------------------------------*/
-
-void codec2_decode_450pwb(struct CODEC2 *c2, short speech[],
- const unsigned char *bits) {
- MODEL model[4];
- int indexes[4];
- int i;
- unsigned int nbit = 0;
-
- assert(c2 != NULL);
-
- /* unpack bits from channel ------------------------------------*/
-
- indexes[0] = unpack_natural_or_gray(bits, &nbit, 9, 0);
- // indexes[1] = unpack_natural_or_gray(bits, &nbit, 9, 0);
- indexes[2] = unpack_natural_or_gray(bits, &nbit, 3, 0);
- indexes[3] = unpack_natural_or_gray(bits, &nbit, 6, 0);
-
- int M = 4;
- COMP HH[M][MAX_AMP + 1];
- float interpolated_surface_[M][NEWAMP2_16K_K];
- int pwbFlag = 1;
-
- newamp2_indexes_to_model(
- &c2->c2const, model, (COMP *)HH, (float *)interpolated_surface_,
- c2->n2_pwb_prev_rate_K_vec_, &c2->Wo_left, &c2->voicing_left,
- c2->n2_pwb_rate_K_sample_freqs_kHz, NEWAMP2_16K_K, c2->phase_fft_fwd_cfg,
- c2->phase_fft_inv_cfg, indexes, 1.5, pwbFlag);
-
- for (i = 0; i < M; i++) {
- synthesise_one_frame(c2, &speech[c2->n_samp * i], &model[i], &HH[i][0],
- 1.5);
- }
-}
-
/*---------------------------------------------------------------------------* \
FUNCTION....: synthesise_one_frame()
@@ -1931,10 +1651,8 @@ void synthesise_one_frame(struct CODEC2 *c2, short speech[], MODEL *model,
COMP Aw[], float gain) {
int i;
- if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode) ||
- CODEC2_MODE_ACTIVE(CODEC2_MODE_450, c2->mode) ||
- CODEC2_MODE_ACTIVE(CODEC2_MODE_450PWB, c2->mode)) {
- /* newamp1/2, we've already worked out rate L phase */
+ if (CODEC2_MODE_ACTIVE(CODEC2_MODE_700C, c2->mode)) {
+ /* newamp1, we've already worked out rate L phase */
COMP *H = Aw;
phase_synth_zero_order(c2->n_samp, model, &c2->ex_phase, H);
} else {