diff options
| author | Author Name <[email protected]> | 2023-07-07 12:20:59 +0930 |
|---|---|---|
| committer | David Rowe <[email protected]> | 2023-07-07 12:29:06 +0930 |
| commit | ac7c48b4dee99d4c772f133d70d8d1b38262fcd2 (patch) | |
| tree | a2d0ace57a9c0e2e5b611c4987f6fed1b38b81e7 /src/postfilter.c | |
shallow zip-file copy from codec2 e9d726bf20
Diffstat (limited to 'src/postfilter.c')
| -rw-r--r-- | src/postfilter.c | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/src/postfilter.c b/src/postfilter.c new file mode 100644 index 0000000..96f4f02 --- /dev/null +++ b/src/postfilter.c @@ -0,0 +1,142 @@ +/*---------------------------------------------------------------------------*\ + + FILE........: postfilter.c + AUTHOR......: David Rowe + DATE CREATED: 13/09/09 + + Postfilter to improve sound quality for speech with high levels of + background noise. Unlike mixed-excitation models requires no bits + to be transmitted to handle background noise. + +\*---------------------------------------------------------------------------*/ + +/* + Copyright (C) 2009 David Rowe + + All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License version 2.1, as + published by the Free Software Foundation. This program is + distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#include "defines.h" +#include "comp.h" +#include "dump.h" +#include "sine.h" +#include "postfilter.h" + +/*---------------------------------------------------------------------------*\ + + DEFINES + +\*---------------------------------------------------------------------------*/ + +#define BG_THRESH 40.0 /* only consider low levels signals for bg_est */ +#define BG_BETA 0.1 /* averaging filter constant */ +#define BG_MARGIN 6.0 /* harmonics this far above BG noise are + randomised. Helped make bg noise less + spikey (impulsive) for mmt1, but speech was + perhaps a little rougher. + */ + +/*---------------------------------------------------------------------------*\ + + postfilter() + + The post filter is designed to help with speech corrupted by + background noise. The zero phase model tends to make speech with + background noise sound "clicky". With high levels of background + noise the low level inter-formant parts of the spectrum will contain + noise rather than speech harmonics, so modelling them as voiced + (i.e. a continuous, non-random phase track) is inaccurate. + + Some codecs (like MBE) have a mixed voicing model that breaks the + spectrum into voiced and unvoiced regions. Several bits/frame + (5-12) are required to transmit the frequency selective voicing + information. Mixed excitation also requires accurate voicing + estimation (parameter estimators always break occasionally under + exceptional conditions). + + In our case we use a post filter approach which requires no + additional bits to be transmitted. The decoder measures the average + level of the background noise during unvoiced frames. If a harmonic + is less than this level it is made unvoiced by randomising it's + phases. + + This idea is rather experimental. Some potential problems that may + happen: + + 1/ If someone says "aaaaaaaahhhhhhhhh" will background estimator track + up to speech level? This would be a bad thing. + + 2/ If background noise suddenly disappears from the source speech does + estimate drop quickly? What is noise suddenly re-appears? + + 3/ Background noise with a non-flat sepctrum. Current algorithm just + comsiders scpetrum as a whole, but this could be broken up into + bands, each with their own estimator. + + 4/ Males and females with the same level of background noise. Check + performance the same. Changing Wo affects width of each band, may + affect bg energy estimates. + + 5/ Not sure what happens during long periods of voiced speech + e.g. "sshhhhhhh" + +\*---------------------------------------------------------------------------*/ + +void postfilter( + MODEL *model, + float *bg_est +) +{ + int m, uv; + float e, thresh; + + /* determine average energy across spectrum */ + + e = 1E-12; + for(m=1; m<=model->L; m++) + e += model->A[m]*model->A[m]; + + assert(e > 0.0); + e = 10.0*log10f(e/model->L); + + /* If beneath threshold, update bg estimate. The idea + of the threshold is to prevent updating during high level + speech. */ + + if ((e < BG_THRESH) && !model->voiced) + *bg_est = *bg_est*(1.0 - BG_BETA) + e*BG_BETA; + + /* now mess with phases during voiced frames to make any harmonics + less then our background estimate unvoiced. + */ + + uv = 0; + thresh = POW10F((*bg_est + BG_MARGIN)/20.0); + if (model->voiced) + for(m=1; m<=model->L; m++) + if (model->A[m] < thresh) { + model->phi[m] = (TWO_PI/CODEC2_RAND_MAX)*(float)codec2_rand(); + uv++; + } + +#ifdef DUMP + dump_bg(e, *bg_est, 100.0*uv/model->L); +#endif + +} |
