fft.cl 2.11 KB
Newer Older
1
2
real2 multiplyComplex(real2 c1, real2 c2) {
    return (real2) (c1.x*c2.x-c1.y*c2.y, c1.x*c2.y+c1.y*c2.x);
Peter Eastman's avatar
Peter Eastman committed
3
4
}

5
6
7
8
9
10
11
12
13
14
15
16
17
/**
 * Load a value from the half-complex grid produces by a real-to-complex transform.
 */
real2 loadComplexValue(__global const real2* restrict in, int x, int y, int z) {
    const int inputZSize = ZSIZE/2+1;
    if (z < inputZSize)
        return in[x*YSIZE*inputZSize+y*inputZSize+z];
    int xp = (x == 0 ? 0 : XSIZE-x);
    int yp = (y == 0 ? 0 : YSIZE-y);
    real2 value = in[xp*YSIZE*inputZSize+yp*inputZSize+(ZSIZE-z)];
    return (real2) (value.x, -value.y);
}

Peter Eastman's avatar
Peter Eastman committed
18
19
20
21
/**
 * Perform a 1D FFT on each row along one axis.
 */

22
__kernel void execFFT(__global const INPUT_TYPE* restrict in, __global OUTPUT_TYPE* restrict out, __local real2* restrict w,
23
        __local real2* restrict data0, __local real2* restrict data1) {
Peter Eastman's avatar
Peter Eastman committed
24
    for (int i = get_local_id(0); i < ZSIZE; i += get_local_size(0))
25
        w[i] = (real2) (cos(-(SIGN)*i*2*M_PI/ZSIZE), sin(-(SIGN)*i*2*M_PI/ZSIZE));
26
    barrier(CLK_LOCAL_MEM_FENCE);
27
    
peastman's avatar
peastman committed
28
29
    for (int baseIndex = get_group_id(0)*BLOCKS_PER_GROUP; baseIndex < XSIZE*YSIZE; baseIndex += get_num_groups(0)*BLOCKS_PER_GROUP) {
        int index = baseIndex+get_local_id(0)/ZSIZE;
Peter Eastman's avatar
Peter Eastman committed
30
31
        int x = index/YSIZE;
        int y = index-x*YSIZE;
32
#if OUTPUT_IS_PACKED
peastman's avatar
peastman committed
33
        if (x < XSIZE/2+1) {
34
#endif
35
#if LOOP_REQUIRED
Peter Eastman's avatar
Peter Eastman committed
36
        for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))
37
38
    #if INPUT_IS_REAL
            data0[z] = (real2) (in[x*(YSIZE*ZSIZE)+y*ZSIZE+z], 0);
39
40
    #elif INPUT_IS_PACKED
            data0[z] = loadComplexValue(in, x, y, z);
41
    #else
Peter Eastman's avatar
Peter Eastman committed
42
            data0[z] = in[x*(YSIZE*ZSIZE)+y*ZSIZE+z];
43
    #endif
Peter Eastman's avatar
Peter Eastman committed
44
#else
peastman's avatar
peastman committed
45
        if (index < XSIZE*YSIZE)
46
47
    #if INPUT_IS_REAL
            data0[get_local_id(0)] = (real2) (in[x*(YSIZE*ZSIZE)+y*ZSIZE+get_local_id(0)%ZSIZE], 0);
48
49
    #elif INPUT_IS_PACKED
            data0[get_local_id(0)] = loadComplexValue(in, x, y, get_local_id(0)%ZSIZE);
50
    #else
51
            data0[get_local_id(0)] = in[x*(YSIZE*ZSIZE)+y*ZSIZE+get_local_id(0)%ZSIZE];
52
    #endif
Peter Eastman's avatar
Peter Eastman committed
53
#endif
Peter Eastman's avatar
Peter Eastman committed
54
        barrier(CLK_LOCAL_MEM_FENCE);
peastman's avatar
peastman committed
55
56
57
#if OUTPUT_IS_PACKED
        }
#endif
Peter Eastman's avatar
Peter Eastman committed
58
59
        COMPUTE_FFT
    }
Peter Eastman's avatar
Peter Eastman committed
60
}