aboutsummaryrefslogtreecommitdiff
path: root/src/postfilter.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/postfilter.c')
-rw-r--r--src/postfilter.c134
1 files changed, 134 insertions, 0 deletions
diff --git a/src/postfilter.c b/src/postfilter.c
new file mode 100644
index 0000000..d45dce1
--- /dev/null
+++ b/src/postfilter.c
@@ -0,0 +1,134 @@
+/*---------------------------------------------------------------------------*\
+
+ FILE........: postfilter.c
+ AUTHOR......: David Rowe
+ DATE CREATED: 13/09/09
+
+ Postfilter to improve sound quality for speech with high levels of
+ background noise. Unlike mixed-excitation models requires no bits
+ to be transmitted to handle background noise.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2009 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 2.1, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "postfilter.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "comp.h"
+#include "defines.h"
+#include "sine.h"
+
+/*---------------------------------------------------------------------------*\
+
+ DEFINES
+
+\*---------------------------------------------------------------------------*/
+
+#define BG_THRESH 40.0 /* only consider low levels signals for bg_est */
+#define BG_BETA 0.1 /* averaging filter constant */
+#define BG_MARGIN \
+ 6.0 /* harmonics this far above BG noise are \
+ randomised. Helped make bg noise less \
+ spikey (impulsive) for mmt1, but speech was \
+ perhaps a little rougher. \
+ */
+
+/*---------------------------------------------------------------------------*\
+
+ postfilter()
+
+ The post filter is designed to help with speech corrupted by
+ background noise. The zero phase model tends to make speech with
+ background noise sound "clicky". With high levels of background
+ noise the low level inter-formant parts of the spectrum will contain
+ noise rather than speech harmonics, so modelling them as voiced
+ (i.e. a continuous, non-random phase track) is inaccurate.
+
+ Some codecs (like MBE) have a mixed voicing model that breaks the
+ spectrum into voiced and unvoiced regions. Several bits/frame
+ (5-12) are required to transmit the frequency selective voicing
+ information. Mixed excitation also requires accurate voicing
+ estimation (parameter estimators always break occasionally under
+ exceptional conditions).
+
+ In our case we use a post filter approach which requires no
+ additional bits to be transmitted. The decoder measures the average
+ level of the background noise during unvoiced frames. If a harmonic
+ is less than this level it is made unvoiced by randomising it's
+ phases.
+
+ This idea is rather experimental. Some potential problems that may
+ happen:
+
+ 1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track
+ up to speech level? This would be a bad thing.
+
+ 2/ If background noise suddenly disappears from the source speech does
+ estimate drop quickly? What is noise suddenly re-appears?
+
+ 3/ Background noise with a non-flat sepctrum. Current algorithm just
+ comsiders scpetrum as a whole, but this could be broken up into
+ bands, each with their own estimator.
+
+ 4/ Males and females with the same level of background noise. Check
+ performance the same. Changing Wo affects width of each band, may
+ affect bg energy estimates.
+
+ 5/ Not sure what happens during long periods of voiced speech
+ e.g. "sshhhhhhh"
+
+\*---------------------------------------------------------------------------*/
+
+void postfilter(MODEL *model, float *bg_est) {
+ int m, uv;
+ float e, thresh;
+
+ /* determine average energy across spectrum */
+
+ e = 1E-12;
+ for (m = 1; m <= model->L; m++) e += model->A[m] * model->A[m];
+
+ assert(e > 0.0);
+ e = 10.0 * log10f(e / model->L);
+
+ /* If beneath threshold, update bg estimate. The idea
+ of the threshold is to prevent updating during high level
+ speech. */
+
+ if ((e < BG_THRESH) && !model->voiced)
+ *bg_est = *bg_est * (1.0 - BG_BETA) + e * BG_BETA;
+
+ /* now mess with phases during voiced frames to make any harmonics
+ less then our background estimate unvoiced.
+ */
+
+ uv = 0;
+ thresh = POW10F((*bg_est + BG_MARGIN) / 20.0);
+ if (model->voiced)
+ for (m = 1; m <= model->L; m++)
+ if (model->A[m] < thresh) {
+ model->phi[m] = (TWO_PI / CODEC2_RAND_MAX) * (float)codec2_rand();
+ uv++;
+ }
+
+}