aboutsummaryrefslogtreecommitdiff
path: root/src/kiss_fft.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/kiss_fft.c')
-rw-r--r--src/kiss_fft.c426
1 files changed, 426 insertions, 0 deletions
diff --git a/src/kiss_fft.c b/src/kiss_fft.c
new file mode 100644
index 0000000..e7993cf
--- /dev/null
+++ b/src/kiss_fft.c
@@ -0,0 +1,426 @@
+/*
+Copyright (c) 2003-2010, Mark Borgerding
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation and/or
+other materials provided with the distribution.
+ * Neither the author nor the names of any contributors may be used to
+endorse or promote products derived from this software without specific prior
+written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "_kiss_fft_guts.h"
+/* The guts header contains all the multiplication and addition macros that are
+ defined for fixed or floating point complex numbers. It also declares the kf_
+ internal functions.
+ */
+
+static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride,
+ const kiss_fft_cfg st, int m) {
+ kiss_fft_cpx *Fout2;
+ kiss_fft_cpx *tw1 = st->twiddles;
+ kiss_fft_cpx t;
+ Fout2 = Fout + m;
+ do {
+ C_FIXDIV(*Fout, 2);
+ C_FIXDIV(*Fout2, 2);
+
+ C_MUL(t, *Fout2, *tw1);
+ tw1 += fstride;
+ C_SUB(*Fout2, *Fout, t);
+ C_ADDTO(*Fout, t);
+ ++Fout2;
+ ++Fout;
+ } while (--m);
+}
+
+static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride,
+ const kiss_fft_cfg st, const size_t m) {
+ kiss_fft_cpx *tw1, *tw2, *tw3;
+ kiss_fft_cpx scratch[6];
+ size_t k = m;
+ const size_t m2 = 2 * m;
+ const size_t m3 = 3 * m;
+
+ tw3 = tw2 = tw1 = st->twiddles;
+
+ do {
+ C_FIXDIV(*Fout, 4);
+ C_FIXDIV(Fout[m], 4);
+ C_FIXDIV(Fout[m2], 4);
+ C_FIXDIV(Fout[m3], 4);
+
+ C_MUL(scratch[0], Fout[m], *tw1);
+ C_MUL(scratch[1], Fout[m2], *tw2);
+ C_MUL(scratch[2], Fout[m3], *tw3);
+
+ C_SUB(scratch[5], *Fout, scratch[1]);
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD(scratch[3], scratch[0], scratch[2]);
+ C_SUB(scratch[4], scratch[0], scratch[2]);
+ C_SUB(Fout[m2], *Fout, scratch[3]);
+ tw1 += fstride;
+ tw2 += fstride * 2;
+ tw3 += fstride * 3;
+ C_ADDTO(*Fout, scratch[3]);
+
+ if (st->inverse) {
+ Fout[m].r = scratch[5].r - scratch[4].i;
+ Fout[m].i = scratch[5].i + scratch[4].r;
+ Fout[m3].r = scratch[5].r + scratch[4].i;
+ Fout[m3].i = scratch[5].i - scratch[4].r;
+ } else {
+ Fout[m].r = scratch[5].r + scratch[4].i;
+ Fout[m].i = scratch[5].i - scratch[4].r;
+ Fout[m3].r = scratch[5].r - scratch[4].i;
+ Fout[m3].i = scratch[5].i + scratch[4].r;
+ }
+ ++Fout;
+ } while (--k);
+}
+
+static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride,
+ const kiss_fft_cfg st, size_t m) {
+ size_t k = m;
+ const size_t m2 = 2 * m;
+ kiss_fft_cpx *tw1, *tw2;
+ kiss_fft_cpx scratch[5];
+ kiss_fft_cpx epi3;
+ epi3 = st->twiddles[fstride * m];
+
+ tw1 = tw2 = st->twiddles;
+
+ do {
+ C_FIXDIV(*Fout, 3);
+ C_FIXDIV(Fout[m], 3);
+ C_FIXDIV(Fout[m2], 3);
+
+ C_MUL(scratch[1], Fout[m], *tw1);
+ C_MUL(scratch[2], Fout[m2], *tw2);
+
+ C_ADD(scratch[3], scratch[1], scratch[2]);
+ C_SUB(scratch[0], scratch[1], scratch[2]);
+ tw1 += fstride;
+ tw2 += fstride * 2;
+
+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+ C_MULBYSCALAR(scratch[0], epi3.i);
+
+ C_ADDTO(*Fout, scratch[3]);
+
+ Fout[m2].r = Fout[m].r + scratch[0].i;
+ Fout[m2].i = Fout[m].i - scratch[0].r;
+
+ Fout[m].r -= scratch[0].i;
+ Fout[m].i += scratch[0].r;
+
+ ++Fout;
+ } while (--k);
+}
+
+static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
+ const kiss_fft_cfg st, int m) {
+ kiss_fft_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
+ int u;
+ kiss_fft_cpx scratch[13];
+ kiss_fft_cpx *twiddles = st->twiddles;
+ kiss_fft_cpx *tw;
+ kiss_fft_cpx ya, yb;
+ ya = twiddles[fstride * m];
+ yb = twiddles[fstride * 2 * m];
+
+ Fout0 = Fout;
+ Fout1 = Fout0 + m;
+ Fout2 = Fout0 + 2 * m;
+ Fout3 = Fout0 + 3 * m;
+ Fout4 = Fout0 + 4 * m;
+
+ tw = st->twiddles;
+ for (u = 0; u < m; ++u) {
+ C_FIXDIV(*Fout0, 5);
+ C_FIXDIV(*Fout1, 5);
+ C_FIXDIV(*Fout2, 5);
+ C_FIXDIV(*Fout3, 5);
+ C_FIXDIV(*Fout4, 5);
+ scratch[0] = *Fout0;
+
+ C_MUL(scratch[1], *Fout1, tw[u * fstride]);
+ C_MUL(scratch[2], *Fout2, tw[2 * u * fstride]);
+ C_MUL(scratch[3], *Fout3, tw[3 * u * fstride]);
+ C_MUL(scratch[4], *Fout4, tw[4 * u * fstride]);
+
+ C_ADD(scratch[7], scratch[1], scratch[4]);
+ C_SUB(scratch[10], scratch[1], scratch[4]);
+ C_ADD(scratch[8], scratch[2], scratch[3]);
+ C_SUB(scratch[9], scratch[2], scratch[3]);
+
+ Fout0->r += scratch[7].r + scratch[8].r;
+ Fout0->i += scratch[7].i + scratch[8].i;
+
+ scratch[5].r =
+ scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r);
+ scratch[5].i =
+ scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r);
+
+ scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i);
+ scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i);
+
+ C_SUB(*Fout1, scratch[5], scratch[6]);
+ C_ADD(*Fout4, scratch[5], scratch[6]);
+
+ scratch[11].r =
+ scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r);
+ scratch[11].i =
+ scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r);
+ scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i);
+ scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i);
+
+ C_ADD(*Fout2, scratch[11], scratch[12]);
+ C_SUB(*Fout3, scratch[11], scratch[12]);
+
+ ++Fout0;
+ ++Fout1;
+ ++Fout2;
+ ++Fout3;
+ ++Fout4;
+ }
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride,
+ const kiss_fft_cfg st, int m, int p) {
+ int u, k, q1, q;
+ kiss_fft_cpx *twiddles = st->twiddles;
+ kiss_fft_cpx t;
+ int Norig = st->nfft;
+
+ kiss_fft_cpx *scratch =
+ (kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p);
+
+ for (u = 0; u < m; ++u) {
+ k = u;
+ for (q1 = 0; q1 < p; ++q1) {
+ scratch[q1] = Fout[k];
+ C_FIXDIV(scratch[q1], p);
+ k += m;
+ }
+
+ k = u;
+ for (q1 = 0; q1 < p; ++q1) {
+ int twidx = 0;
+ Fout[k] = scratch[0];
+ for (q = 1; q < p; ++q) {
+ twidx += fstride * k;
+ if (twidx >= Norig) twidx -= Norig;
+ C_MUL(t, scratch[q], twiddles[twidx]);
+ C_ADDTO(Fout[k], t);
+ }
+ k += m;
+ }
+ }
+ KISS_FFT_TMP_FREE(scratch);
+}
+
+static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f,
+ const size_t fstride, int in_stride, int *factors,
+ const kiss_fft_cfg st) {
+ kiss_fft_cpx *Fout_beg = Fout;
+ const int p = *factors++; /* the radix */
+ const int m = *factors++; /* stage's fft length/p */
+ const kiss_fft_cpx *Fout_end = Fout + p * m;
+
+#ifdef _OPENMP
+ // use openmp extensions at the
+ // top-level (not recursive)
+ if (fstride == 1 && p <= 5) {
+ int k;
+
+ // execute the p different work units in different threads
+#pragma omp parallel for
+ for (k = 0; k < p; ++k)
+ kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride,
+ factors, st);
+ // all threads have joined by this point
+
+ switch (p) {
+ case 2:
+ kf_bfly2(Fout, fstride, st, m);
+ break;
+ case 3:
+ kf_bfly3(Fout, fstride, st, m);
+ break;
+ case 4:
+ kf_bfly4(Fout, fstride, st, m);
+ break;
+ case 5:
+ kf_bfly5(Fout, fstride, st, m);
+ break;
+ default:
+ kf_bfly_generic(Fout, fstride, st, m, p);
+ break;
+ }
+ return;
+ }
+#endif
+
+ if (m == 1) {
+ do {
+ *Fout = *f;
+ f += fstride * in_stride;
+ } while (++Fout != Fout_end);
+ } else {
+ do {
+ // recursive call:
+ // DFT of size m*p performed by doing
+ // p instances of smaller DFTs of size m,
+ // each one takes a decimated version of the input
+ kf_work(Fout, f, fstride * p, in_stride, factors, st);
+ f += fstride * in_stride;
+ } while ((Fout += m) != Fout_end);
+ }
+
+ Fout = Fout_beg;
+
+ // recombine the p smaller DFTs
+ switch (p) {
+ case 2:
+ kf_bfly2(Fout, fstride, st, m);
+ break;
+ case 3:
+ kf_bfly3(Fout, fstride, st, m);
+ break;
+ case 4:
+ kf_bfly4(Fout, fstride, st, m);
+ break;
+ case 5:
+ kf_bfly5(Fout, fstride, st, m);
+ break;
+ default:
+ kf_bfly_generic(Fout, fstride, st, m, p);
+ break;
+ }
+}
+
+/* facbuf is populated by p1,m1,p2,m2, ...
+ where
+ p[i] * m[i] = m[i-1]
+ m0 = n */
+static void kf_factor(int n, int *facbuf) {
+ int p = 4;
+ double floor_sqrt;
+ floor_sqrt = floorf(sqrtf((double)n));
+
+ /*factor out powers of 4, powers of 2, then any remaining primes */
+ do {
+ while (n % p) {
+ switch (p) {
+ case 4:
+ p = 2;
+ break;
+ case 2:
+ p = 3;
+ break;
+ default:
+ p += 2;
+ break;
+ }
+ if (p > floor_sqrt) p = n; /* no more factors, skip to end */
+ }
+ n /= p;
+ *facbuf++ = p;
+ *facbuf++ = n;
+ } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc. As
+ * such, It can be freed with free(), rather than a kiss_fft-specific function.
+ * */
+kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem,
+ size_t *lenmem) {
+ kiss_fft_cfg st = NULL;
+ size_t memneeded = sizeof(struct kiss_fft_state) +
+ sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/
+
+ if (lenmem == NULL) {
+ st = (kiss_fft_cfg)KISS_FFT_MALLOC(memneeded);
+ } else {
+ if (mem != NULL && *lenmem >= memneeded) st = (kiss_fft_cfg)mem;
+ *lenmem = memneeded;
+ }
+ if (st) {
+ int i;
+ st->nfft = nfft;
+ st->inverse = inverse_fft;
+
+ for (i = 0; i < nfft; ++i) {
+ const double pi =
+ 3.141592653589793238462643383279502884197169399375105820974944;
+ double phase = -2 * pi * i / nfft;
+ if (st->inverse) phase *= -1;
+ kf_cexp(st->twiddles + i, phase);
+ }
+
+ kf_factor(nfft, st->factors);
+ }
+ return st;
+}
+
+void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout, int in_stride) {
+ if (fin == fout) {
+ // NOTE: this is not really an in-place FFT algorithm.
+ // It just performs an out-of-place FFT into a temp buffer
+ kiss_fft_cpx *tmpbuf =
+ (kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * st->nfft);
+ kf_work(tmpbuf, fin, 1, in_stride, st->factors, st);
+ memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft);
+ KISS_FFT_TMP_FREE(tmpbuf);
+ } else {
+ kf_work(fout, fin, 1, in_stride, st->factors, st);
+ }
+}
+
+void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) {
+ kiss_fft_stride(cfg, fin, fout, 1);
+}
+
+void kiss_fft_cleanup(void) {
+ // nothing needed any more
+}
+
+int kiss_fft_next_fast_size(int n) {
+ while (1) {
+ int m = n;
+ while ((m % 2) == 0) m /= 2;
+ while ((m % 3) == 0) m /= 3;
+ while ((m % 5) == 0) m /= 5;
+ if (m <= 1)
+ break; /* n is completely factorable by twos, threes, and fives */
+ n++;
+ }
+ return n;
+}