fft.cl 1.12 KB
Newer Older
1
2
real2 multiplyComplex(real2 c1, real2 c2) {
    return (real2) (c1.x*c2.x-c1.y*c2.y, c1.x*c2.y+c1.y*c2.x);
Peter Eastman's avatar
Peter Eastman committed
3
4
5
6
7
8
}

/**
 * Perform a 1D FFT on each row along one axis.
 */

9
10
__kernel void execFFT(__global const real2* restrict in, __global real2* restrict out, int sign, __local real2* restrict w,
        __local real2* restrict data0, __local real2* restrict data1) {
Peter Eastman's avatar
Peter Eastman committed
11
    for (int i = get_local_id(0); i < ZSIZE; i += get_local_size(0))
12
        w[i] = (real2) (cos(-sign*i*2*M_PI/ZSIZE), sin(-sign*i*2*M_PI/ZSIZE));
13
    barrier(CLK_LOCAL_MEM_FENCE);
14
    
peastman's avatar
peastman committed
15
16
    for (int baseIndex = get_group_id(0)*BLOCKS_PER_GROUP; baseIndex < XSIZE*YSIZE; baseIndex += get_num_groups(0)*BLOCKS_PER_GROUP) {
        int index = baseIndex+get_local_id(0)/ZSIZE;
Peter Eastman's avatar
Peter Eastman committed
17
18
        int x = index/YSIZE;
        int y = index-x*YSIZE;
19
#if LOOP_REQUIRED
Peter Eastman's avatar
Peter Eastman committed
20
21
22
        for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))
            data0[z] = in[x*(YSIZE*ZSIZE)+y*ZSIZE+z];
#else
peastman's avatar
peastman committed
23
        if (index < XSIZE*YSIZE)
24
            data0[get_local_id(0)] = in[x*(YSIZE*ZSIZE)+y*ZSIZE+get_local_id(0)%ZSIZE];
Peter Eastman's avatar
Peter Eastman committed
25
#endif
Peter Eastman's avatar
Peter Eastman committed
26
27
28
        barrier(CLK_LOCAL_MEM_FENCE);
        COMPUTE_FFT
    }
Peter Eastman's avatar
Peter Eastman committed
29
}