diff options
Diffstat (limited to 'src/kiss_fft.c')
| -rw-r--r-- | src/kiss_fft.c | 426 |
1 files changed, 426 insertions, 0 deletions
diff --git a/src/kiss_fft.c b/src/kiss_fft.c new file mode 100644 index 0000000..e7993cf --- /dev/null +++ b/src/kiss_fft.c @@ -0,0 +1,426 @@ +/* +Copyright (c) 2003-2010, Mark Borgerding + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + * Neither the author nor the names of any contributors may be used to +endorse or promote products derived from this software without specific prior +written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "_kiss_fft_guts.h" +/* The guts header contains all the multiplication and addition macros that are + defined for fixed or floating point complex numbers. It also declares the kf_ + internal functions. + */ + +static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, + const kiss_fft_cfg st, int m) { + kiss_fft_cpx *Fout2; + kiss_fft_cpx *tw1 = st->twiddles; + kiss_fft_cpx t; + Fout2 = Fout + m; + do { + C_FIXDIV(*Fout, 2); + C_FIXDIV(*Fout2, 2); + + C_MUL(t, *Fout2, *tw1); + tw1 += fstride; + C_SUB(*Fout2, *Fout, t); + C_ADDTO(*Fout, t); + ++Fout2; + ++Fout; + } while (--m); +} + +static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride, + const kiss_fft_cfg st, const size_t m) { + kiss_fft_cpx *tw1, *tw2, *tw3; + kiss_fft_cpx scratch[6]; + size_t k = m; + const size_t m2 = 2 * m; + const size_t m3 = 3 * m; + + tw3 = tw2 = tw1 = st->twiddles; + + do { + C_FIXDIV(*Fout, 4); + C_FIXDIV(Fout[m], 4); + C_FIXDIV(Fout[m2], 4); + C_FIXDIV(Fout[m3], 4); + + C_MUL(scratch[0], Fout[m], *tw1); + C_MUL(scratch[1], Fout[m2], *tw2); + C_MUL(scratch[2], Fout[m3], *tw3); + + C_SUB(scratch[5], *Fout, scratch[1]); + C_ADDTO(*Fout, scratch[1]); + C_ADD(scratch[3], scratch[0], scratch[2]); + C_SUB(scratch[4], scratch[0], scratch[2]); + C_SUB(Fout[m2], *Fout, scratch[3]); + tw1 += fstride; + tw2 += fstride * 2; + tw3 += fstride * 3; + C_ADDTO(*Fout, scratch[3]); + + if (st->inverse) { + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + } else { + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + } + ++Fout; + } while (--k); +} + +static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride, + const kiss_fft_cfg st, size_t m) { + size_t k = m; + const size_t m2 = 2 * m; + kiss_fft_cpx *tw1, *tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride * m]; + + tw1 = tw2 = st->twiddles; + + do { + C_FIXDIV(*Fout, 3); + C_FIXDIV(Fout[m], 3); + C_FIXDIV(Fout[m2], 3); + + C_MUL(scratch[1], Fout[m], *tw1); + C_MUL(scratch[2], Fout[m2], *tw2); + + C_ADD(scratch[3], scratch[1], scratch[2]); + C_SUB(scratch[0], scratch[1], scratch[2]); + tw1 += fstride; + tw2 += fstride * 2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR(scratch[0], epi3.i); + + C_ADDTO(*Fout, scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + } while (--k); +} + +static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride, + const kiss_fft_cfg st, int m) { + kiss_fft_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx *twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya, yb; + ya = twiddles[fstride * m]; + yb = twiddles[fstride * 2 * m]; + + Fout0 = Fout; + Fout1 = Fout0 + m; + Fout2 = Fout0 + 2 * m; + Fout3 = Fout0 + 3 * m; + Fout4 = Fout0 + 4 * m; + + tw = st->twiddles; + for (u = 0; u < m; ++u) { + C_FIXDIV(*Fout0, 5); + C_FIXDIV(*Fout1, 5); + C_FIXDIV(*Fout2, 5); + C_FIXDIV(*Fout3, 5); + C_FIXDIV(*Fout4, 5); + scratch[0] = *Fout0; + + C_MUL(scratch[1], *Fout1, tw[u * fstride]); + C_MUL(scratch[2], *Fout2, tw[2 * u * fstride]); + C_MUL(scratch[3], *Fout3, tw[3 * u * fstride]); + C_MUL(scratch[4], *Fout4, tw[4 * u * fstride]); + + C_ADD(scratch[7], scratch[1], scratch[4]); + C_SUB(scratch[10], scratch[1], scratch[4]); + C_ADD(scratch[8], scratch[2], scratch[3]); + C_SUB(scratch[9], scratch[2], scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = + scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r); + scratch[5].i = + scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r); + + scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i); + scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i); + + C_SUB(*Fout1, scratch[5], scratch[6]); + C_ADD(*Fout4, scratch[5], scratch[6]); + + scratch[11].r = + scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r); + scratch[11].i = + scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r); + scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i); + scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i); + + C_ADD(*Fout2, scratch[11], scratch[12]); + C_SUB(*Fout3, scratch[11], scratch[12]); + + ++Fout0; + ++Fout1; + ++Fout2; + ++Fout3; + ++Fout4; + } +} + +/* perform the butterfly for one stage of a mixed radix FFT */ +static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride, + const kiss_fft_cfg st, int m, int p) { + int u, k, q1, q; + kiss_fft_cpx *twiddles = st->twiddles; + kiss_fft_cpx t; + int Norig = st->nfft; + + kiss_fft_cpx *scratch = + (kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p); + + for (u = 0; u < m; ++u) { + k = u; + for (q1 = 0; q1 < p; ++q1) { + scratch[q1] = Fout[k]; + C_FIXDIV(scratch[q1], p); + k += m; + } + + k = u; + for (q1 = 0; q1 < p; ++q1) { + int twidx = 0; + Fout[k] = scratch[0]; + for (q = 1; q < p; ++q) { + twidx += fstride * k; + if (twidx >= Norig) twidx -= Norig; + C_MUL(t, scratch[q], twiddles[twidx]); + C_ADDTO(Fout[k], t); + } + k += m; + } + } + KISS_FFT_TMP_FREE(scratch); +} + +static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f, + const size_t fstride, int in_stride, int *factors, + const kiss_fft_cfg st) { + kiss_fft_cpx *Fout_beg = Fout; + const int p = *factors++; /* the radix */ + const int m = *factors++; /* stage's fft length/p */ + const kiss_fft_cpx *Fout_end = Fout + p * m; + +#ifdef _OPENMP + // use openmp extensions at the + // top-level (not recursive) + if (fstride == 1 && p <= 5) { + int k; + + // execute the p different work units in different threads +#pragma omp parallel for + for (k = 0; k < p; ++k) + kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride, + factors, st); + // all threads have joined by this point + + switch (p) { + case 2: + kf_bfly2(Fout, fstride, st, m); + break; + case 3: + kf_bfly3(Fout, fstride, st, m); + break; + case 4: + kf_bfly4(Fout, fstride, st, m); + break; + case 5: + kf_bfly5(Fout, fstride, st, m); + break; + default: + kf_bfly_generic(Fout, fstride, st, m, p); + break; + } + return; + } +#endif + + if (m == 1) { + do { + *Fout = *f; + f += fstride * in_stride; + } while (++Fout != Fout_end); + } else { + do { + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + kf_work(Fout, f, fstride * p, in_stride, factors, st); + f += fstride * in_stride; + } while ((Fout += m) != Fout_end); + } + + Fout = Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: + kf_bfly2(Fout, fstride, st, m); + break; + case 3: + kf_bfly3(Fout, fstride, st, m); + break; + case 4: + kf_bfly4(Fout, fstride, st, m); + break; + case 5: + kf_bfly5(Fout, fstride, st, m); + break; + default: + kf_bfly_generic(Fout, fstride, st, m, p); + break; + } +} + +/* facbuf is populated by p1,m1,p2,m2, ... + where + p[i] * m[i] = m[i-1] + m0 = n */ +static void kf_factor(int n, int *facbuf) { + int p = 4; + double floor_sqrt; + floor_sqrt = floorf(sqrtf((double)n)); + + /*factor out powers of 4, powers of 2, then any remaining primes */ + do { + while (n % p) { + switch (p) { + case 4: + p = 2; + break; + case 2: + p = 3; + break; + default: + p += 2; + break; + } + if (p > floor_sqrt) p = n; /* no more factors, skip to end */ + } + n /= p; + *facbuf++ = p; + *facbuf++ = n; + } while (n > 1); +} + +/* + * + * User-callable function to allocate all necessary storage space for the fft. + * + * The return value is a contiguous block of memory, allocated with malloc. As + * such, It can be freed with free(), rather than a kiss_fft-specific function. + * */ +kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, + size_t *lenmem) { + kiss_fft_cfg st = NULL; + size_t memneeded = sizeof(struct kiss_fft_state) + + sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/ + + if (lenmem == NULL) { + st = (kiss_fft_cfg)KISS_FFT_MALLOC(memneeded); + } else { + if (mem != NULL && *lenmem >= memneeded) st = (kiss_fft_cfg)mem; + *lenmem = memneeded; + } + if (st) { + int i; + st->nfft = nfft; + st->inverse = inverse_fft; + + for (i = 0; i < nfft; ++i) { + const double pi = + 3.141592653589793238462643383279502884197169399375105820974944; + double phase = -2 * pi * i / nfft; + if (st->inverse) phase *= -1; + kf_cexp(st->twiddles + i, phase); + } + + kf_factor(nfft, st->factors); + } + return st; +} + +void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin, + kiss_fft_cpx *fout, int in_stride) { + if (fin == fout) { + // NOTE: this is not really an in-place FFT algorithm. + // It just performs an out-of-place FFT into a temp buffer + kiss_fft_cpx *tmpbuf = + (kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * st->nfft); + kf_work(tmpbuf, fin, 1, in_stride, st->factors, st); + memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft); + KISS_FFT_TMP_FREE(tmpbuf); + } else { + kf_work(fout, fin, 1, in_stride, st->factors, st); + } +} + +void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) { + kiss_fft_stride(cfg, fin, fout, 1); +} + +void kiss_fft_cleanup(void) { + // nothing needed any more +} + +int kiss_fft_next_fast_size(int n) { + while (1) { + int m = n; + while ((m % 2) == 0) m /= 2; + while ((m % 3) == 0) m /= 3; + while ((m % 5) == 0) m /= 5; + if (m <= 1) + break; /* n is completely factorable by twos, threes, and fives */ + n++; + } + return n; +} |
