shallow zip-file copy from codec2 e9d726bf20

author: Author Name <[email protected]> 2023-07-07 12:20:59 +0930
committer: David Rowe <[email protected]> 2023-07-07 12:29:06 +0930
commit: ac7c48b4dee99d4c772f133d70d8d1b38262fcd2 (patch)
tree: a2d0ace57a9c0e2e5b611c4987f6fed1b38b81e7 /src/postfilter.c
1 files changed, 142 insertions, 0 deletions
diff --git a/src/postfilter.c b/src/postfilter.c
new file mode 100644
index 0000000..96f4f02
--- /dev/null
+++ b/src/postfilter.c
@@ -0,0 +1,142 @@
+/*---------------------------------------------------------------------------*\
+
+  FILE........: postfilter.c
+  AUTHOR......: David Rowe
+  DATE CREATED: 13/09/09
+
+  Postfilter to improve sound quality for speech with high levels of
+  background noise.  Unlike mixed-excitation models requires no bits
+  to be transmitted to handle background noise.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+  Copyright (C) 2009 David Rowe
+
+  All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License version 2.1, as
+  published by the Free Software Foundation.  This program is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+  License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "defines.h"
+#include "comp.h"
+#include "dump.h"
+#include "sine.h"
+#include "postfilter.h"
+
+/*---------------------------------------------------------------------------*\
+
+                                DEFINES
+
+\*---------------------------------------------------------------------------*/
+
+#define BG_THRESH 40.0     /* only consider low levels signals for bg_est */
+#define BG_BETA    0.1     /* averaging filter constant                   */
+#define BG_MARGIN  6.0     /* harmonics this far above BG noise are
+			      randomised.  Helped make bg noise less
+			      spikey (impulsive) for mmt1, but speech was
+                              perhaps a little rougher.
+			   */
+
+/*---------------------------------------------------------------------------*\
+
+  postfilter()
+
+  The post filter is designed to help with speech corrupted by
+  background noise.  The zero phase model tends to make speech with
+  background noise sound "clicky".  With high levels of background
+  noise the low level inter-formant parts of the spectrum will contain
+  noise rather than speech harmonics, so modelling them as voiced
+  (i.e. a continuous, non-random phase track) is inaccurate.
+
+  Some codecs (like MBE) have a mixed voicing model that breaks the
+  spectrum into voiced and unvoiced regions.  Several bits/frame
+  (5-12) are required to transmit the frequency selective voicing
+  information.  Mixed excitation also requires accurate voicing
+  estimation (parameter estimators always break occasionally under
+  exceptional conditions).
+
+  In our case we use a post filter approach which requires no
+  additional bits to be transmitted.  The decoder measures the average
+  level of the background noise during unvoiced frames.  If a harmonic
+  is less than this level it is made unvoiced by randomising it's
+  phases.
+
+  This idea is rather experimental.  Some potential problems that may
+  happen:
+
+  1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track
+     up to speech level?  This would be a bad thing.
+
+  2/ If background noise suddenly disappears from the source speech does
+     estimate drop quickly?  What is noise suddenly re-appears?
+
+  3/ Background noise with a non-flat sepctrum.  Current algorithm just
+     comsiders scpetrum as a whole, but this could be broken up into
+     bands, each with their own estimator.
+
+  4/ Males and females with the same level of background noise.  Check
+     performance the same.  Changing Wo affects width of each band, may
+     affect bg energy estimates.
+
+  5/ Not sure what happens during long periods of voiced speech
+     e.g. "sshhhhhhh"
+
+\*---------------------------------------------------------------------------*/
+
+void postfilter(
+  MODEL *model,
+  float *bg_est
+)
+{
+  int   m, uv;
+  float e, thresh;
+
+  /* determine average energy across spectrum */
+
+  e = 1E-12;
+  for(m=1; m<=model->L; m++)
+      e += model->A[m]*model->A[m];
+
+  assert(e > 0.0);
+  e = 10.0*log10f(e/model->L);
+
+  /* If beneath threshold, update bg estimate.  The idea
+     of the threshold is to prevent updating during high level
+     speech. */
+
+  if ((e < BG_THRESH) && !model->voiced)
+      *bg_est =  *bg_est*(1.0 - BG_BETA) + e*BG_BETA;
+
+  /* now mess with phases during voiced frames to make any harmonics
+     less then our background estimate unvoiced.
+  */
+
+  uv = 0;
+  thresh = POW10F((*bg_est + BG_MARGIN)/20.0);
+  if (model->voiced)
+      for(m=1; m<=model->L; m++)
+	  if (model->A[m] < thresh) {
+	      model->phi[m] = (TWO_PI/CODEC2_RAND_MAX)*(float)codec2_rand();
+	      uv++;
+	  }
+
+#ifdef DUMP
+  dump_bg(e, *bg_est, 100.0*uv/model->L);
+#endif
+
+}
author	Author Name <[email protected]>	2023-07-07 12:20:59 +0930
committer	David Rowe <[email protected]>	2023-07-07 12:29:06 +0930
commit	ac7c48b4dee99d4c772f133d70d8d1b38262fcd2 (patch)
tree	a2d0ace57a9c0e2e5b611c4987f6fed1b38b81e7 /src/postfilter.c