fftpack.cpp 29.8 KB
Newer Older
1
2
3
4
5
/*
 * This file contains a Fortran to C translation of the 1D transformations
 * based on the original FFTPACK, written by paul n swarztrauber
 * at the national center for atmospheric research and available
 * at www.netlib.org. FFTPACK is in the public domain.
6
7
8
9
10
11
 *
 * Higher-dimension transforms copyright Erik Lindahl, 2008-2009.
 * Just as FFTPACK, this file may be redistributed freely, and can be
 * considered to be in the public domain.
 *
 * Any errors in this (threadsafe, but not threaded) C version
12
 * are due to the f2c translator, or hacks by Erik Lindahl.
13
 *
14
15
 * Copyright (c) 2009, Erik Lindahl
 * All rights reserved.
Rossen Apostolov's avatar
Rossen Apostolov committed
16
17
18
 * Contact: lindahl@cbr.su.se
 * Center for Biomembrane Research
 * Stockholm University, Sweden
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer. Redistributions in binary
 * form must reproduce the above copyright notice, this list of conditions and
 * the following disclaimer in the documentation and/or other materials provided
 * with the distribution.
 * Neither the name of the author/university nor the names of its contributors may
 * be used to endorse or promote products derived from this software without
 * specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
42
43
44
45
46
47
48
49
50
51
52
53
 */

#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>


#include "fftpack.h"


54
/** Contents of the FFTPACK fft datatype.
55
 *
56
 *  FFTPACK only does 1d transforms, so we use a pointers to another fft for
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
 *  the transform in the next dimension.
 * Thus, a 3d-structure contains a pointer to a 2d one, which in turns contains
 * a pointer to a 1d. The 1d structure has next==NULL.
 */
struct fftpack
{
    int                    ndim;     /**< Dimensions, including our subdimensions.  */
    int                    n;        /**< Number of points in this dimension.       */
    int                    ifac[15]; /**< 15 bytes needed for cfft and rfft         */
    struct fftpack *       next;     /**< Pointer to next dimension, or NULL.       */
    RealOpenMM *               work;     /**< 1st 4n reserved for cfft, 1st 2n for rfft */
};








77
78
79
static void
fftpack_passf2(int         ido,
               int         l1,
80
81
82
83
84
85
86
               RealOpenMM     cc[],
               RealOpenMM     ch[],
               RealOpenMM     wa1[],
               int         isign)
{
    int i, k, ah, ac;
    RealOpenMM ti2, tr2;
87

88
89
    if (ido <= 2)
    {
90
        for (k=0; k<l1; k++)
91
92
93
94
95
96
97
98
        {
            ah = k*ido;
            ac = 2*k*ido;
            ch[ah]              = cc[ac]   + cc[ac + ido];
            ch[ah + ido*l1]     = cc[ac]   - cc[ac + ido];
            ch[ah+1]            = cc[ac+1] + cc[ac + ido + 1];
            ch[ah + ido*l1 + 1] = cc[ac+1] - cc[ac + ido + 1];
        }
99
    }
100
101
    else
    {
102
        for (k=0; k<l1; k++)
103
        {
104
            for (i=0; i<ido-1; i+=2)
105
106
107
108
109
110
111
112
113
114
115
116
            {
                ah              = i + k*ido;
                ac              = i + 2*k*ido;
                ch[ah]          = cc[ac] + cc[ac + ido];
                tr2             = cc[ac] - cc[ac + ido];
                ch[ah+1]        = cc[ac+1] + cc[ac + 1 + ido];
                ti2             = cc[ac+1] - cc[ac + 1 + ido];
                ch[ah+l1*ido+1] = wa1[i]*ti2 + isign*wa1[i+1]*tr2;
                ch[ah+l1*ido]   = wa1[i]*tr2 - isign*wa1[i+1]*ti2;
            }
        }
    }
117
}
118
119
120



121
static void
122
fftpack_passf3(int         ido,
123
               int         l1,
124
125
               RealOpenMM     cc[],
               RealOpenMM     ch[],
126
               RealOpenMM     wa1[],
127
128
129
130
131
               RealOpenMM     wa2[],
               int         isign)
{
    const RealOpenMM taur = -0.5;
    const RealOpenMM taui = 0.866025403784439;
132

133
134
    int i, k, ac, ah;
    RealOpenMM ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
135
136

    if (ido == 2)
137
    {
138
        for (k=1; k<=l1; k++)
139
140
141
142
143
144
        {
            ac = (3*k - 2)*ido;
            tr2 = cc[ac] + cc[ac + ido];
            cr2 = cc[ac - ido] + taur*tr2;
            ah = (k - 1)*ido;
            ch[ah] = cc[ac - ido] + tr2;
145

146
147
148
            ti2 = cc[ac + 1] + cc[ac + ido + 1];
            ci2 = cc[ac - ido + 1] + taur*ti2;
            ch[ah + 1] = cc[ac - ido + 1] + ti2;
149

150
151
152
153
154
155
156
            cr3 = isign*taui*(cc[ac] - cc[ac + ido]);
            ci3 = isign*taui*(cc[ac + 1] - cc[ac + ido + 1]);
            ch[ah + l1*ido] = cr2 - ci3;
            ch[ah + 2*l1*ido] = cr2 + ci3;
            ch[ah + l1*ido + 1] = ci2 + cr3;
            ch[ah + 2*l1*ido + 1] = ci2 - cr3;
        }
157
    }
158
159
    else
    {
160
        for (k=1; k<=l1; k++)
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        {
            for (i=0; i<ido-1; i+=2)
            {
                ac = i + (3*k - 2)*ido;
                tr2 = cc[ac] + cc[ac + ido];
                cr2 = cc[ac - ido] + taur*tr2;
                ah = i + (k-1)*ido;
                ch[ah] = cc[ac - ido] + tr2;
                ti2 = cc[ac + 1] + cc[ac + ido + 1];
                ci2 = cc[ac - ido + 1] + taur*ti2;
                ch[ah + 1] = cc[ac - ido + 1] + ti2;
                cr3 = isign*taui*(cc[ac] - cc[ac + ido]);
                ci3 = isign*taui*(cc[ac + 1] - cc[ac + ido + 1]);
                dr2 = cr2 - ci3;
                dr3 = cr2 + ci3;
                di2 = ci2 + cr3;
                di3 = ci2 - cr3;
                ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
                ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
                ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
                ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
            }
        }
    }
185
}
186
187


188
189
static void
fftpack_passf4(int          ido,
190
               int          l1,
191
               RealOpenMM      cc[],
192
193
               RealOpenMM      ch[],
               RealOpenMM      wa1[],
194
               RealOpenMM      wa2[],
195
196
197
198
199
               RealOpenMM      wa3[],
               int          isign)
{
    int i, k, ac, ah;
    RealOpenMM ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
200
201

    if (ido == 2)
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    {
        for (k=0; k<l1; k++)
        {
            ac = 4*k*ido + 1;
            ti1 = cc[ac] - cc[ac + 2*ido];
            ti2 = cc[ac] + cc[ac + 2*ido];
            tr4 = cc[ac + 3*ido] - cc[ac + ido];
            ti3 = cc[ac + ido] + cc[ac + 3*ido];
            tr1 = cc[ac - 1] - cc[ac + 2*ido - 1];
            tr2 = cc[ac - 1] + cc[ac + 2*ido - 1];
            ti4 = cc[ac + ido - 1] - cc[ac + 3*ido - 1];
            tr3 = cc[ac + ido - 1] + cc[ac + 3*ido - 1];
            ah = k*ido;
            ch[ah] = tr2 + tr3;
            ch[ah + 2*l1*ido] = tr2 - tr3;
            ch[ah + 1] = ti2 + ti3;
            ch[ah + 2*l1*ido + 1] = ti2 - ti3;
            ch[ah + l1*ido] = tr1 + isign*tr4;
            ch[ah + 3*l1*ido] = tr1 - isign*tr4;
            ch[ah + l1*ido + 1] = ti1 + isign*ti4;
            ch[ah + 3*l1*ido + 1] = ti1 - isign*ti4;
        }
224
    }
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
    else
    {
        for (k=0; k<l1; k++)
        {
            for (i=0; i<ido-1; i+=2)
            {
                ac = i + 1 + 4*k*ido;
                ti1 = cc[ac] - cc[ac + 2*ido];
                ti2 = cc[ac] + cc[ac + 2*ido];
                ti3 = cc[ac + ido] + cc[ac + 3*ido];
                tr4 = cc[ac + 3*ido] - cc[ac + ido];
                tr1 = cc[ac - 1] - cc[ac + 2*ido - 1];
                tr2 = cc[ac - 1] + cc[ac + 2*ido - 1];
                ti4 = cc[ac + ido - 1] - cc[ac + 3*ido - 1];
                tr3 = cc[ac + ido - 1] + cc[ac + 3*ido - 1];
                ah = i + k*ido;
                ch[ah] = tr2 + tr3;
                cr3 = tr2 - tr3;
                ch[ah + 1] = ti2 + ti3;
                ci3 = ti2 - ti3;
                cr2 = tr1 + isign*tr4;
                cr4 = tr1 - isign*tr4;
                ci2 = ti1 + isign*ti4;
                ci4 = ti1 - isign*ti4;
                ch[ah + l1*ido] = wa1[i]*cr2 - isign*wa1[i + 1]*ci2;
                ch[ah + l1*ido + 1] = wa1[i]*ci2 + isign*wa1[i + 1]*cr2;
                ch[ah + 2*l1*ido] = wa2[i]*cr3 - isign*wa2[i + 1]*ci3;
                ch[ah + 2*l1*ido + 1] = wa2[i]*ci3 + isign*wa2[i + 1]*cr3;
                ch[ah + 3*l1*ido] = wa3[i]*cr4 -isign*wa3[i + 1]*ci4;
                ch[ah + 3*l1*ido + 1] = wa3[i]*ci4 + isign*wa3[i + 1]*cr4;
            }
        }
    }
258
}
259
260


261
static void
262
fftpack_passf5(int          ido,
263
               int          l1,
264
265
               RealOpenMM      cc[],
               RealOpenMM      ch[],
266
               RealOpenMM      wa1[],
267
               RealOpenMM      wa2[],
268
               RealOpenMM      wa3[],
269
270
271
272
273
274
275
               RealOpenMM      wa4[],
               int          isign)
{
    const RealOpenMM tr11 = 0.309016994374947;
    const RealOpenMM ti11 = 0.951056516295154;
    const RealOpenMM tr12 = -0.809016994374947;
    const RealOpenMM ti12 = 0.587785252292473;
276

277
278
279
    int i, k, ac, ah;
    RealOpenMM ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
        ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
280
281

    if (ido == 2)
282
    {
283
        for (k = 1; k <= l1; ++k)
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
        {
            ac = (5*k - 4)*ido + 1;
            ti5 = cc[ac] - cc[ac + 3*ido];
            ti2 = cc[ac] + cc[ac + 3*ido];
            ti4 = cc[ac + ido] - cc[ac + 2*ido];
            ti3 = cc[ac + ido] + cc[ac + 2*ido];
            tr5 = cc[ac - 1] - cc[ac + 3*ido - 1];
            tr2 = cc[ac - 1] + cc[ac + 3*ido - 1];
            tr4 = cc[ac + ido - 1] - cc[ac + 2*ido - 1];
            tr3 = cc[ac + ido - 1] + cc[ac + 2*ido - 1];
            ah = (k - 1)*ido;
            ch[ah] = cc[ac - ido - 1] + tr2 + tr3;
            ch[ah + 1] = cc[ac - ido] + ti2 + ti3;
            cr2 = cc[ac - ido - 1] + tr11*tr2 + tr12*tr3;
            ci2 = cc[ac - ido] + tr11*ti2 + tr12*ti3;
            cr3 = cc[ac - ido - 1] + tr12*tr2 + tr11*tr3;
            ci3 = cc[ac - ido] + tr12*ti2 + tr11*ti3;
            cr5 = isign*(ti11*tr5 + ti12*tr4);
            ci5 = isign*(ti11*ti5 + ti12*ti4);
            cr4 = isign*(ti12*tr5 - ti11*tr4);
            ci4 = isign*(ti12*ti5 - ti11*ti4);
            ch[ah + l1*ido] = cr2 - ci5;
            ch[ah + 4*l1*ido] = cr2 + ci5;
            ch[ah + l1*ido + 1] = ci2 + cr5;
            ch[ah + 2*l1*ido + 1] = ci3 + cr4;
            ch[ah + 2*l1*ido] = cr3 - ci4;
            ch[ah + 3*l1*ido] = cr3 + ci4;
            ch[ah + 3*l1*ido + 1] = ci3 - cr4;
            ch[ah + 4*l1*ido + 1] = ci2 - cr5;
        }
314
    }
315
316
    else
    {
317
        for (k=1; k<=l1; k++)
318
        {
319
            for (i=0; i<ido-1; i+=2)
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
            {
                ac = i + 1 + (k*5 - 4)*ido;
                ti5 = cc[ac] - cc[ac + 3*ido];
                ti2 = cc[ac] + cc[ac + 3*ido];
                ti4 = cc[ac + ido] - cc[ac + 2*ido];
                ti3 = cc[ac + ido] + cc[ac + 2*ido];
                tr5 = cc[ac - 1] - cc[ac + 3*ido - 1];
                tr2 = cc[ac - 1] + cc[ac + 3*ido - 1];
                tr4 = cc[ac + ido - 1] - cc[ac + 2*ido - 1];
                tr3 = cc[ac + ido - 1] + cc[ac + 2*ido - 1];
                ah = i + (k - 1)*ido;
                ch[ah] = cc[ac - ido - 1] + tr2 + tr3;
                ch[ah + 1] = cc[ac - ido] + ti2 + ti3;
                cr2 = cc[ac - ido - 1] + tr11*tr2 + tr12*tr3;
                ci2 = cc[ac - ido] + tr11*ti2 + tr12*ti3;
                cr3 = cc[ac - ido - 1] + tr12*tr2 + tr11*tr3;
                ci3 = cc[ac - ido] + tr12*ti2 + tr11*ti3;
                cr5 = isign*(ti11*tr5 + ti12*tr4);
                ci5 = isign*(ti11*ti5 + ti12*ti4);
                cr4 = isign*(ti12*tr5 - ti11*tr4);
                ci4 = isign*(ti12*ti5 - ti11*ti4);
                dr3 = cr3 - ci4;
                dr4 = cr3 + ci4;
                di3 = ci3 + cr4;
                di4 = ci3 - cr4;
                dr5 = cr2 + ci5;
                dr2 = cr2 - ci5;
                di5 = ci2 - cr5;
                di2 = ci2 + cr5;
                ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
                ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
                ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
                ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
                ch[ah + 3*l1*ido] = wa3[i]*dr4 - isign*wa3[i+1]*di4;
                ch[ah + 3*l1*ido + 1] = wa3[i]*di4 + isign*wa3[i+1]*dr4;
                ch[ah + 4*l1*ido] = wa4[i]*dr5 - isign*wa4[i+1]*di5;
                ch[ah + 4*l1*ido + 1] = wa4[i]*di5 + isign*wa4[i+1]*dr5;
            }
        }
    }
360
}
361
362


363
364
static void
fftpack_passf(int *        nac,
365
              int          ido,
366
              int          ip,
367
368
369
370
371
372
373
374
375
              int          l1,
              int          idl1,
              RealOpenMM      cc[],
              RealOpenMM      ch[],
              RealOpenMM      wa[],
              int          isign)
{
    int idij, idlj, idot, ipph, i, j, k, l, jc, lc, ik, nt, idj, idl, inc,idp;
    RealOpenMM wai, war;
376

377
378
379
380
    idot = ido / 2;
    nt = ip*idl1;
    ipph = (ip + 1) / 2;
    idp = ip*ido;
381
    if (ido >= l1)
382
383
384
385
    {
        for (j=1; j<ipph; j++)
        {
            jc = ip - j;
386
            for (k=0; k<l1; k++)
387
            {
388
                for (i=0; i<ido; i++)
389
390
391
392
393
394
395
396
397
                {
                    ch[i + (k + j*l1)*ido]  = cc[i + (j + k*ip)*ido] + cc[i + (jc + k*ip)*ido];
                    ch[i + (k + jc*l1)*ido] = cc[i + (j + k*ip)*ido] - cc[i + (jc + k*ip)*ido];
                }
            }
        }
        for (k=0; k<l1; k++)
            for (i=0; i<ido; i++)
                ch[i + k*ido] = cc[i + k*ip*ido];
398
    }
399
400
    else
    {
401
        for (j=1; j<ipph; j++)
402
403
        {
            jc = ip - j;
404
            for (i=0; i<ido; i++)
405
            {
406
                for (k=0; k<l1; k++)
407
408
409
410
411
412
413
414
415
416
                {
                    ch[i + (k + j*l1)*ido] =  cc[i + (j + k*ip)*ido] + cc[i + (jc + k*ip)*ido];
                    ch[i + (k + jc*l1)*ido] = cc[i + (j + k*ip)*ido] - cc[i + (jc + k*ip)*ido];
                }
            }
        }
        for (i=0; i<ido; i++)
            for (k=0; k<l1; k++)
                ch[i + k*ido] = cc[i + k*ip*ido];
    }
417

418
419
    idl = 2 - ido;
    inc = 0;
420
    for (l=1; l<ipph; l++)
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
    {
        lc = ip - l;
        idl += ido;
        for (ik=0; ik<idl1; ik++)
        {
            cc[ik + l*idl1] = ch[ik] + wa[idl - 2]*ch[ik + idl1];
            cc[ik + lc*idl1] = isign*wa[idl-1]*ch[ik + (ip-1)*idl1];
        }
        idlj = idl;
        inc += ido;
        for (j=2; j<ipph; j++)
        {
            jc = ip - j;
            idlj += inc;
            if (idlj > idp) idlj -= idp;
            war = wa[idlj - 2];
            wai = wa[idlj-1];
            for (ik=0; ik<idl1; ik++)
            {
                cc[ik + l*idl1] += war*ch[ik + j*idl1];
                cc[ik + lc*idl1] += isign*wai*ch[ik + jc*idl1];
            }
        }
    }
    for (j=1; j<ipph; j++)
        for (ik=0; ik<idl1; ik++)
            ch[ik] += ch[ik + j*idl1];
448
    for (j=1; j<ipph; j++)
449
450
    {
        jc = ip - j;
451
        for (ik=1; ik<idl1; ik+=2)
452
453
454
455
456
457
458
459
        {
            ch[ik - 1 + j*idl1] = cc[ik - 1 + j*idl1] - cc[ik + jc*idl1];
            ch[ik - 1 + jc*idl1] = cc[ik - 1 + j*idl1] + cc[ik + jc*idl1];
            ch[ik + j*idl1] = cc[ik + j*idl1] + cc[ik - 1 + jc*idl1];
            ch[ik + jc*idl1] = cc[ik + j*idl1] - cc[ik - 1 + jc*idl1];
        }
    }
    *nac = 1;
460
    if (ido == 2)
461
462
463
464
465
466
467
468
        return;
    *nac = 0;
    for (ik=0; ik<idl1; ik++)
    {
        cc[ik] = ch[ik];
    }
    for (j=1; j<ip; j++)
    {
469
        for (k=0; k<l1; k++)
470
471
472
473
474
        {
            cc[(k + j*l1)*ido + 0] = ch[(k + j*l1)*ido + 0];
            cc[(k + j*l1)*ido + 1] = ch[(k + j*l1)*ido + 1];
        }
    }
475
    if (idot <= l1)
476
477
478
479
480
    {
        idij = 0;
        for (j=1; j<ip; j++)
        {
            idij += 2;
481
            for (i=3; i<ido; i+=2)
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
            {
                idij += 2;
                for (k=0; k<l1; k++)
                {
                    cc[i - 1 + (k + j*l1)*ido] =
                    wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
                    isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
                    cc[i + (k + j*l1)*ido] =
                        wa[idij - 2]*ch[i + (k + j*l1)*ido] +
                        isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
                }
            }
        }
    }
    else
    {
        idj = 2 - ido;
499
        for (j=1; j<ip; j++)
500
501
        {
            idj += ido;
502
            for (k = 0; k < l1; k++)
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
            {
                idij = idj;
                for (i=3; i<ido; i+=2)
                {
                    idij += 2;
                    cc[i - 1 + (k + j*l1)*ido] =
                        wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
                        isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
                    cc[i + (k + j*l1)*ido] =
                        wa[idij - 2]*ch[i + (k + j*l1)*ido] +
                        isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
                }
            }
        }
    }
518
}
519
520
521
522
523





524
static void
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
fftpack_cfftf1(int          n,
               RealOpenMM      c[],
               RealOpenMM      ch[],
               RealOpenMM      wa[],
               int          ifac[15],
               int          isign)
{
    int idot, i;
    int k1, l1, l2;
    int na, nf, ip, iw, ix2, ix3, ix4, nac, ido, idl1;
    RealOpenMM *cinput, *coutput;
    nf = ifac[1];
    na = 0;
    l1 = 1;
    iw = 0;
540
541

    for (k1=2; k1<=nf+1; k1++)
542
543
544
545
546
547
    {
        ip = ifac[k1];
        l2 = ip*l1;
        ido = n / l2;
        idot = ido + ido;
        idl1 = idot*l1;
548
        if (na)
549
550
551
552
        {
            cinput = ch;
            coutput = c;
        }
553
        else
554
555
556
557
        {
            cinput = c;
            coutput = ch;
        }
558
        switch (ip)
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
        {
            case 4:
                ix2 = iw + idot;
                ix3 = ix2 + idot;
                fftpack_passf4(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], isign);
                na = !na;
                break;
            case 2:
                fftpack_passf2(idot, l1, cinput, coutput, &wa[iw], isign);
                na = !na;
                break;
            case 3:
                ix2 = iw + idot;
                fftpack_passf3(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], isign);
                na = !na;
                break;
            case 5:
                ix2 = iw + idot;
                ix3 = ix2 + idot;
                ix4 = ix3 + idot;
                fftpack_passf5(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
                na = !na;
                break;
            default:
                fftpack_passf(&nac, idot, ip, l1, idl1, cinput, coutput, &wa[iw], isign);
                if (nac != 0) na = !na;
        }
        l1 = l2;
        iw += (ip - 1)*idot;
    }
589
    if (na == 0)
590
        return;
591
    for (i=0; i<2*n; i++)
592
593
594
595
596
597
        c[i] = ch[i];
}




598
static void
599
600
601
602
603
604
605
606
607
608
609
610
fftpack_factorize(int    n,
                  int    ifac[15])
{
    static const int ntryh[4] = { 3,4,2,5 };
    int ntry=3, i, j=0, ib, nf=0, nl=n, nq, nr;

startloop:
    if (j < 4)
        ntry = ntryh[j];
    else
        ntry+= 2;
    j++;
611
    do
612
613
614
615
616
617
618
    {
        nq = nl / ntry;
        nr = nl - ntry*nq;
        if (nr != 0) goto startloop;
        nf++;
        ifac[nf + 1] = ntry;
        nl = nq;
619
        if (ntry == 2 && nf != 1)
620
        {
621
            for (i=2; i<=nf; i++)
622
623
624
625
626
627
            {
                ib = nf - i + 2;
                ifac[ib + 1] = ifac[ib];
            }
            ifac[2] = 2;
        }
628
    }
629
630
631
632
633
634
635
    while (nl != 1);
    ifac[0] = n;
    ifac[1] = nf;
}


static void
636
637
fftpack_cffti1(int          n,
               RealOpenMM      wa[],
638
639
640
641
642
643
644
645
646
647
648
649
650
651
               int          ifac[15])
{
    const RealOpenMM twopi = 6.28318530717959;
    RealOpenMM arg, argh, argld, fi;
    int idot, i, j;
    int i1, k1, l1, l2;
    int ld, ii, nf, ip;
    int ido, ipm;

    fftpack_factorize(n,ifac);
    nf = ifac[1];
    argh = twopi/(RealOpenMM)n;
    i = 1;
    l1 = 1;
652
    for (k1=1; k1<=nf; k1++)
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
    {
        ip = ifac[k1+1];
        ld = 0;
        l2 = l1*ip;
        ido = n / l2;
        idot = ido + ido + 2;
        ipm = ip - 1;
        for (j=1; j<=ipm; j++)
        {
            i1 = i;
            wa[i-1] = 1;
            wa[i] = 0;
            ld += l1;
            fi = 0;
            argld = ld*argh;
668
            for (ii=4; ii<=idot; ii+=2)
669
670
671
672
673
674
675
            {
                i+= 2;
                fi+= 1;
                arg = fi*argld;
                wa[i-1] = cos(arg);
                wa[i] = sin(arg);
            }
676
            if (ip > 5)
677
678
679
680
681
682
683
            {
                wa[i1-1] = wa[i-1];
                wa[i1] = wa[i];
            }
        }
        l1 = l2;
    }
684
}
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709








static int
fftpack_transpose_2d(t_complex *          in_data,
                     t_complex *          out_data,
                     int                  nx,
                     int                  ny)
{
	t_complex *  src;
	int          i,j;

    if(nx<2 || ny<2)
    {
        if(in_data != out_data)
        {
            memcpy(out_data,in_data,sizeof(t_complex)*nx*ny);
        }
        return 0;
    }
710

711
712
713
714
715
716
717
718
719
    if(in_data == out_data)
	{
		src = (t_complex *)malloc(sizeof(t_complex)*nx*ny);
		memcpy(src,in_data,sizeof(t_complex)*nx*ny);
	}
	else
	{
		src = in_data;
	}
720

721
722
723
724
725
726
727
728
	for(i=0;i<nx;i++)
	{
		for(j=0;j<ny;j++)
		{
			out_data[j*nx+i].re = src[i*ny+j].re;
			out_data[j*nx+i].im = src[i*ny+j].im;
		}
	}
729

730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
	if(src != in_data)
	{
		free(src);
	}
	return 0;
}



static int
fftpack_transpose_2d_nelem(t_complex *          in_data,
						   t_complex *          out_data,
						   int                  nx,
						   int                  ny,
                           int                  nelem)
{
	t_complex *   src;
	int           ncopy;
	int           i,j;
749

750
	ncopy = nelem*sizeof(t_complex);
751

752
753
754
755
756
757
758
759
    if(nx<2 || ny<2)
    {
        if(in_data != out_data)
        {
            memcpy(out_data,in_data,nx*ny*ncopy);
        }
        return 0;
    }
760

761
762
763
764
765
766
767
768
769
    if(in_data == out_data)
	{
		src = (t_complex *)malloc(nx*ny*ncopy);
		memcpy(src,in_data,nx*ny*ncopy);
	}
	else
	{
		src = in_data;
	}
770

771
772
773
774
775
776
777
	for(i=0;i<nx;i++)
	{
		for(j=0;j<ny;j++)
		{
			memcpy(out_data + (j*nx+i)*nelem , src + (i*ny+j)*nelem , ncopy);
		}
	}
778

779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
	if(src != in_data)
	{
		free(src);
	}
	return 0;
}






int
fftpack_init_1d(fftpack_t *        pfft,
                int                nx)
{
    fftpack_t    fft;
796

797
798
799
800
801
802
    if(pfft==NULL)
    {
        fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
        return EINVAL;
    }
    *pfft = NULL;
803

804
805
806
    if( (fft = (struct fftpack *)malloc(sizeof(struct fftpack))) == NULL)
    {
        return ENOMEM;
807
808
    }

809
810
    fft->next = NULL;
    fft->n    = nx;
811

812
    /* Need 4*n storage for 1D complex FFT */
813
    if( (fft->work = (RealOpenMM *)malloc(sizeof(RealOpenMM)*(4*nx))) == NULL)
814
815
816
817
818
819
820
    {
        free(fft);
        return ENOMEM;
    }

    if(fft->n>1)
        fftpack_cffti1(nx,fft->work,fft->ifac);
821

822
823
824
825
826
827
828
829
830
831
832
833
834
835
    *pfft = fft;
    return 0;
};




int
fftpack_init_2d(fftpack_t *        pfft,
                int                nx,
                int                ny)
{
    fftpack_t     fft;
    int           rc;
836

837
838
839
840
841
842
843
844
845
846
847
    if(pfft==NULL)
    {
        fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
        return EINVAL;
    }
    *pfft = NULL;

    /* Create the X transform */
    if( (rc = fftpack_init_1d(&fft,nx)) != 0)
    {
        return rc;
848
849
    }

850
851
852
853
854
855
    /* Create Y transform as a link from X */
    if( (rc=fftpack_init_1d(&(fft->next),ny)) != 0)
    {
        free(fft);
        return rc;
    }
856

857
858
859
860
861
862
863
864
865
866
867
868
869
870
    *pfft = fft;
    return 0;
};



int
fftpack_init_3d(fftpack_t *        pfft,
                int                nx,
                int                ny,
                int                nz)
{
    fftpack_t     fft;
    int           rc;
871

872
873
874
875
876
877
878
879
880
881
882
883
    if(pfft==NULL)
    {
        fprintf(stderr,"Fatal error - Invalid FFT opaque type pointer.");
        return EINVAL;
    }
    *pfft = NULL;

    /* Create the X transform */

    if( (fft = (struct fftpack *)malloc(sizeof(struct fftpack))) == NULL)
    {
        return ENOMEM;
884
    }
885
886

    fft->n    = nx;
887

888
889
    /* Need 4*nx storage for 1D complex FFT.
     */
890
    if( (fft->work = (RealOpenMM *)malloc(sizeof(RealOpenMM)*(4*nx))) == NULL)
891
892
893
894
    {
        free(fft);
        return ENOMEM;
    }
895

896
    fftpack_cffti1(nx,fft->work,fft->ifac);
897

898
899
900
901
902
903
    /* Create 2D Y/Z transforms as a link from X */
    if( (rc=fftpack_init_2d(&(fft->next),ny,nz)) != 0)
    {
        free(fft);
        return rc;
    }
904

905
906
907
908
909
910
    *pfft = fft;

	return 0;
};


911
int
912
913
914
915
916
917
918
919
920
921
922
923
924
925
fftpack_exec_1d          (fftpack_t                  fft,
                          enum fftpack_direction     dir,
                          t_complex *                in_data,
                          t_complex *                out_data)
{
    int             i,n;
    RealOpenMM *    p1;
    RealOpenMM *    p2;

    n=fft->n;

    if(n==1)
    {
        p1 = (RealOpenMM *)in_data;
926
        p2 = (RealOpenMM *)out_data;
927
928
929
        p2[0] = p1[0];
        p2[1] = p1[1];
    }
930

931
932
933
934
935
936
937
    /* FFTPACK only does in-place transforms, so emulate out-of-place
     * by copying data to the output array first.
     */
    if( in_data != out_data )
    {
        p1 = (RealOpenMM *)in_data;
        p2 = (RealOpenMM *)out_data;
938

939
940
941
942
943
944
        /* n complex = 2*n RealOpenMM elements */
        for(i=0;i<2*n;i++)
        {
            p2[i] = p1[i];
        }
    }
945

946
947
948
    /* Elements 0   .. 2*n-1 in work are used for ffac values,
     * Elements 2*n .. 4*n-1 are internal FFTPACK work space.
     */
949
950

    if(dir == FFTPACK_FORWARD)
951
952
953
954
955
    {
        fftpack_cfftf1(n,(RealOpenMM *)out_data,fft->work+2*n,fft->work,fft->ifac, -1);
    }
    else if(dir == FFTPACK_BACKWARD)
    {
956
        fftpack_cfftf1(n,(RealOpenMM *)out_data,fft->work+2*n,fft->work,fft->ifac, 1);
957
958
959
960
961
    }
    else
    {
        fprintf(stderr,"FFT plan mismatch - bad plan or direction.");
        return EINVAL;
962
    }
963
964
965
966
967
968
969
970

    return 0;
}





971
int
972
973
974
975
976
977
978
fftpack_exec_2d          (fftpack_t                  fft,
                          enum fftpack_direction     dir,
                          t_complex *                in_data,
                          t_complex *                out_data)
{
    int                i,nx,ny;
    t_complex *    data;
979

980
981
    nx = fft->n;
    ny = fft->next->n;
982

983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
    /* FFTPACK only does in-place transforms, so emulate out-of-place
     * by copying data to the output array first.
     * For 2D there is likely enough data to benefit from memcpy().
     */
    if( in_data != out_data )
    {
        memcpy(out_data,in_data,sizeof(t_complex)*nx*ny);
    }

    /* Much easier to do pointer arithmetic when base has the correct type */
    data = (t_complex *)out_data;

    /* y transforms */
    for(i=0;i<nx;i++)
    {
        fftpack_exec_1d(fft->next,dir,data+i*ny,data+i*ny);
    }
1000

1001
1002
    /* Transpose in-place to get data in place for x transform now */
    fftpack_transpose_2d(data,data,nx,ny);
1003

1004
1005
1006
1007
1008
    /* x transforms */
    for(i=0;i<ny;i++)
    {
        fftpack_exec_1d(fft,dir,data+i*nx,data+i*nx);
    }
1009

1010
1011
    /* Transpose in-place to get data back in original order */
    fftpack_transpose_2d(data,data,ny,nx);
1012

1013
1014
1015
1016
1017
1018
1019
    return 0;
}





1020
int
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
fftpack_exec_3d     (fftpack_t                  fft,
                     enum fftpack_direction     dir,
                     t_complex *                in_data,
                     t_complex *                out_data)
{
    int              i,nx,ny,nz,rc;
    t_complex *      data;

    nx=fft->n;
    ny=fft->next->n;
    nz=fft->next->next->n;

    /* FFTPACK only does in-place transforms, so emulate out-of-place
     * by copying data to the output array first.
     * For 3D there is likely enough data to benefit from memcpy().
     */
    if( in_data != out_data )
    {
        memcpy(out_data,in_data,sizeof(t_complex)*nx*ny*nz);
    }
1041

1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
    /* Much easier to do pointer arithmetic when base has the correct type */
    data = (t_complex *)out_data;

    /* Perform z transforms */
    for(i=0;i<nx*ny;i++)
        fftpack_exec_1d(fft->next->next,dir,data+i*nz,data+i*nz);

    /* For each X slice, transpose the y & z dimensions inside the slice */
    for(i=0;i<nx;i++)
    {
        fftpack_transpose_2d(data+i*ny*nz,data+i*ny*nz,ny,nz);
    }

    /* Array is now (nx,nz,ny) - perform y transforms */
    for(i=0;i<nx*nz;i++)
    {
        fftpack_exec_1d(fft->next,dir,data+i*ny,data+i*ny);
    }

    /* Transpose back to (nx,ny,nz) */
    for(i=0;i<nx;i++)
    {
        fftpack_transpose_2d(data+i*ny*nz,data+i*ny*nz,nz,ny);
    }
1066

1067
1068
1069
1070
1071
1072
1073
1074
1075
    /* Transpose entire x & y slices to go from
     * (nx,ny,nz) to (ny,nx,nz).
     */
    rc=fftpack_transpose_2d_nelem(data,data,nx,ny,nz);
    if( rc != 0)
    {
        fprintf(stderr,"Fatal error - cannot transpose X & Y/Z in fftpack_exec_3d().");
        return rc;
    }
1076

1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
    /* Then go from (ny,nx,nz) to (ny,nz,nx) */
    for(i=0;i<ny;i++)
    {
        fftpack_transpose_2d(data+i*nx*nz,data+i*nx*nz,nx,nz);
    }

    /* Perform x transforms */
    for(i=0;i<ny*nz;i++)
    {
        fftpack_exec_1d(fft,dir,data+i*nx,data+i*nx);
    }
1088

1089
1090
1091
1092
1093
    /* Transpose back from (ny,nz,nx) to (ny,nx,nz) */
    for(i=0;i<ny;i++)
    {
        fftpack_transpose_2d(data+i*nz*nx,data+i*nz*nx,nz,nx);
    }
1094
1095

    /* Transpose from (ny,nx,nz) to (nx,ny,nz).
1096
1097
1098
1099
1100
1101
1102
     */
    rc = fftpack_transpose_2d_nelem(data,data,ny,nx,nz);
    if( rc != 0)
    {
        fprintf(stderr,"Fatal error - cannot transpose Y/Z & X in fftpack_exec_3d().");
        return rc;
    }
1103

1104
1105
1106
1107
1108
1109
1110
1111
    return 0;
}



void
fftpack_destroy(fftpack_t      fft)
{
1112
    if(fft != NULL)
1113
1114
1115
1116
1117
1118
1119
1120
1121
    {
        free(fft->work);
        if(fft->next != NULL)
            fftpack_destroy(fft->next);
        free(fft);
    }
}