aboutsummaryrefslogtreecommitdiff
path: root/src/phase.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/phase.c')
-rw-r--r--src/phase.c275
1 files changed, 275 insertions, 0 deletions
diff --git a/src/phase.c b/src/phase.c
new file mode 100644
index 0000000..dec8793
--- /dev/null
+++ b/src/phase.c
@@ -0,0 +1,275 @@
+/*---------------------------------------------------------------------------*\
+
+ FILE........: phase.c
+ AUTHOR......: David Rowe
+ DATE CREATED: 1/2/09
+
+ Functions for modelling and synthesising phase.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+ Copyright (C) 2009 David Rowe
+
+ All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License version 2.1, as
+ published by the Free Software Foundation. This program is
+ distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; if not,see <http://www.gnu.org/licenses/>.
+*/
+
+#include "phase.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "comp.h"
+#include "comp_prim.h"
+#include "defines.h"
+#include "kiss_fft.h"
+#include "sine.h"
+
+/*---------------------------------------------------------------------------*\
+
+ sample_phase()
+
+ Samples phase at centre of each harmonic from and array of FFT_ENC
+ DFT samples.
+
+\*---------------------------------------------------------------------------*/
+
+void sample_phase(MODEL *model, COMP H[],
+ COMP A[] /* LPC analysis filter in freq domain */
+) {
+ int m, b;
+ float r;
+
+ r = TWO_PI / (FFT_ENC);
+
+ /* Sample phase at harmonics */
+
+ for (m = 1; m <= model->L; m++) {
+ b = (int)(m * model->Wo / r + 0.5);
+ H[m] =
+ cconj(A[b]); /* synth filter 1/A is opposite phase to analysis filter */
+ }
+}
+
+/*---------------------------------------------------------------------------*\
+
+ phase_synth_zero_order()
+
+ Synthesises phases based on SNR and a rule based approach. No phase
+ parameters are required apart from the SNR (which can be reduced to a
+ 1 bit V/UV decision per frame).
+
+ The phase of each harmonic is modelled as the phase of a synthesis
+ filter excited by an impulse. In many Codec 2 modes the synthesis
+ filter is a LPC filter. Unlike the first order model the position
+ of the impulse is not transmitted, so we create an excitation pulse
+ train using a rule based approach.
+
+ Consider a pulse train with a pulse starting time n=0, with pulses
+ repeated at a rate of Wo, the fundamental frequency. A pulse train
+ in the time domain is equivalent to harmonics in the frequency
+ domain. We can make an excitation pulse train using a sum of
+ sinsusoids:
+
+ for(m=1; m<=L; m++)
+ ex[n] = cos(m*Wo*n)
+
+ Note: the Octave script ../octave/phase.m is an example of this if
+ you would like to try making a pulse train.
+
+ The phase of each excitation harmonic is:
+
+ arg(E[m]) = mWo
+
+ where E[m] are the complex excitation (freq domain) samples,
+ arg(x), just returns the phase of a complex sample x.
+
+ As we don't transmit the pulse position for this model, we need to
+ synthesise it. Now the excitation pulses occur at a rate of Wo.
+ This means the phase of the first harmonic advances by N_SAMP samples
+ over a synthesis frame of N_SAMP samples. For example if Wo is pi/20
+ (200 Hz), then over a 10ms frame (N_SAMP=80 samples), the phase of the
+ first harmonic would advance (pi/20)*80 = 4*pi or two complete
+ cycles.
+
+ We generate the excitation phase of the fundamental (first
+ harmonic):
+
+ arg[E[1]] = Wo*N_SAMP;
+
+ We then relate the phase of the m-th excitation harmonic to the
+ phase of the fundamental as:
+
+ arg(E[m]) = m*arg(E[1])
+
+ This E[m] then gets passed through the LPC synthesis filter to
+ determine the final harmonic phase.
+
+ Comparing to speech synthesised using original phases:
+
+ - Through headphones speech synthesised with this model is not as
+ good. Through a loudspeaker it is very close to original phases.
+
+ - If there are voicing errors, the speech can sound clicky or
+ staticy. If V speech is mistakenly declared UV, this model tends to
+ synthesise impulses or clicks, as there is usually very little shift or
+ dispersion through the LPC synthesis filter.
+
+ - When combined with LPC amplitude modelling there is an additional
+ drop in quality. I am not sure why, theory is interformant energy
+ is raised making any phase errors more obvious.
+
+ NOTES:
+
+ 1/ This synthesis model is effectively the same as a simple LPC-10
+ vocoders, and yet sounds much better. Why? Conventional wisdom
+ (AMBE, MELP) says mixed voicing is required for high quality
+ speech.
+
+ 2/ I am pretty sure the Lincoln Lab sinusoidal coding guys (like xMBE
+ also from MIT) first described this zero phase model, I need to look
+ up the paper.
+
+ 3/ Note that this approach could cause some discontinuities in
+ the phase at the edge of synthesis frames, as no attempt is made
+ to make sure that the phase tracks are continuous (the excitation
+ phases are continuous, but not the final phases after filtering
+ by the LPC spectra). Technically this is a bad thing. However
+ this may actually be a good thing, disturbing the phase tracks a
+ bit. More research needed, e.g. test a synthesis model that adds
+ a small delta-W to make phase tracks line up for voiced
+ harmonics.
+
+\*---------------------------------------------------------------------------*/
+
+void phase_synth_zero_order(
+ int n_samp, MODEL *model,
+ float *ex_phase, /* excitation phase of fundamental */
+ COMP H[] /* L synthesis filter freq domain samples */
+
+) {
+ int m;
+ float new_phi;
+ COMP Ex[MAX_AMP + 1]; /* excitation samples */
+ COMP A_[MAX_AMP + 1]; /* synthesised harmonic samples */
+
+ /*
+ Update excitation fundamental phase track, this sets the position
+ of each pitch pulse during voiced speech. After much experiment
+ I found that using just this frame's Wo improved quality for UV
+ sounds compared to interpolating two frames Wo like this:
+
+ ex_phase[0] += (*prev_Wo+model->Wo)*N_SAMP/2;
+ */
+
+ ex_phase[0] += (model->Wo) * n_samp;
+ ex_phase[0] -= TWO_PI * floorf(ex_phase[0] / TWO_PI + 0.5);
+
+ for (m = 1; m <= model->L; m++) {
+ /* generate excitation */
+
+ if (model->voiced) {
+ Ex[m].real = cosf(ex_phase[0] * m);
+ Ex[m].imag = sinf(ex_phase[0] * m);
+ } else {
+ /* When a few samples were tested I found that LPC filter
+ phase is not needed in the unvoiced case, but no harm in
+ keeping it.
+ */
+ float phi = TWO_PI * (float)codec2_rand() / CODEC2_RAND_MAX;
+ Ex[m].real = cosf(phi);
+ Ex[m].imag = sinf(phi);
+ }
+
+ /* filter using LPC filter */
+
+ A_[m].real = H[m].real * Ex[m].real - H[m].imag * Ex[m].imag;
+ A_[m].imag = H[m].imag * Ex[m].real + H[m].real * Ex[m].imag;
+
+ /* modify sinusoidal phase */
+
+ new_phi = atan2f(A_[m].imag, A_[m].real + 1E-12);
+ model->phi[m] = new_phi;
+ }
+}
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: mag_to_phase
+ AUTHOR......: David Rowe
+ DATE CREATED: Jan 2017
+
+ Algorithm for http://www.dsprelated.com/showcode/20.php ported to C. See
+ also Octave function mag_to_phase.m
+
+ Given a magnitude spectrum in dB, returns a minimum-phase phase
+ spectra.
+
+\*---------------------------------------------------------------------------*/
+
+void mag_to_phase(
+ float phase[], /* Nfft/2+1 output phase samples in radians */
+ float Gdbfk[], /* Nfft/2+1 positive freq amplitudes samples in dB */
+ int Nfft, codec2_fft_cfg fft_fwd_cfg, codec2_fft_cfg fft_inv_cfg) {
+ COMP Sdb[Nfft], c[Nfft], cf[Nfft], Cf[Nfft];
+ int Ns = Nfft / 2 + 1;
+ int i;
+
+ /* install negative frequency components, 1/Nfft takes into
+ account kiss fft lack of scaling on ifft */
+
+ Sdb[0].real = Gdbfk[0];
+ Sdb[0].imag = 0.0;
+ for (i = 1; i < Ns; i++) {
+ Sdb[i].real = Sdb[Nfft - i].real = Gdbfk[i];
+ Sdb[i].imag = Sdb[Nfft - i].imag = 0.0;
+ }
+
+ /* compute real cepstrum from log magnitude spectrum */
+
+ codec2_fft(fft_inv_cfg, Sdb, c);
+ for (i = 0; i < Nfft; i++) {
+ c[i].real /= (float)Nfft;
+ c[i].imag /= (float)Nfft;
+ }
+
+ /* Fold cepstrum to reflect non-min-phase zeros inside unit circle */
+
+ cf[0] = c[0];
+ for (i = 1; i < Ns - 1; i++) {
+ cf[i] = cadd(c[i], c[Nfft - i]);
+ }
+ cf[Ns - 1] = c[Ns - 1];
+ for (i = Ns; i < Nfft; i++) {
+ cf[i].real = 0.0;
+ cf[i].imag = 0.0;
+ }
+
+ /* Cf = dB_magnitude + j * minimum_phase */
+
+ codec2_fft(fft_fwd_cfg, cf, Cf);
+
+ /* The maths says we are meant to be using log(x), not 20*log10(x),
+ so we need to scale the phase to account for this:
+ log(x) = 20*log10(x)/scale */
+
+ float scale = (20.0 / logf(10.0));
+
+ for (i = 0; i < Ns; i++) {
+ phase[i] = Cf[i].imag / scale;
+ }
+}