Commit 63722d0a authored by peastman's avatar peastman
Browse files

Bug fixes to real-to-complex FFT

parent ad62d81e
...@@ -313,15 +313,23 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa ...@@ -313,15 +313,23 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa
bool outputIsPacked = (inputIsReal && axis == 2 && forward); bool outputIsPacked = (inputIsReal && axis == 2 && forward);
string outputSuffix = (outputIsReal ? ".x" : ""); string outputSuffix = (outputIsReal ? ".x" : "");
if (loopRequired) { if (loopRequired) {
if (outputIsPacked)
source<<"if (x < XSIZE/2+1)\n";
source<<"for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))\n"; source<<"for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))\n";
source<<"out[y*(ZSIZE*XSIZE)+z*XSIZE+x] = data"<<(stage%2)<<"[z]"<<outputSuffix<<";\n"; if (outputIsPacked)
source<<"out[y*(ZSIZE*(XSIZE/2+1))+z*(XSIZE/2+1)+x] = data"<<(stage%2)<<"[z]"<<outputSuffix<<";\n";
else
source<<"out[y*(ZSIZE*XSIZE)+z*XSIZE+x] = data"<<(stage%2)<<"[z]"<<outputSuffix<<";\n";
} }
else { else {
source<<"if (index < XSIZE*YSIZE)\n"; if (outputIsPacked) {
if (outputIsPacked) source<<"if (index < XSIZE*YSIZE && x < XSIZE/2+1)\n";
source<<"out[y*(ZSIZE*(XSIZE/2+1))+(get_local_id(0)%ZSIZE)*(XSIZE/2+1)+x] = data"<<(stage%2)<<"[get_local_id(0)]"<<outputSuffix<<";\n"; source<<"out[y*(ZSIZE*(XSIZE/2+1))+(get_local_id(0)%ZSIZE)*(XSIZE/2+1)+x] = data"<<(stage%2)<<"[get_local_id(0)]"<<outputSuffix<<";\n";
else }
else {
source<<"if (index < XSIZE*YSIZE)\n";
source<<"out[y*(ZSIZE*XSIZE)+(get_local_id(0)%ZSIZE)*XSIZE+x] = data"<<(stage%2)<<"[get_local_id(0)]"<<outputSuffix<<";\n"; source<<"out[y*(ZSIZE*XSIZE)+(get_local_id(0)%ZSIZE)*XSIZE+x] = data"<<(stage%2)<<"[get_local_id(0)]"<<outputSuffix<<";\n";
}
} }
map<string, string> replacements; map<string, string> replacements;
replacements["XSIZE"] = context.intToString(xsize); replacements["XSIZE"] = context.intToString(xsize);
......
...@@ -30,8 +30,7 @@ __kernel void execFFT(__global const INPUT_TYPE* restrict in, __global OUTPUT_TY ...@@ -30,8 +30,7 @@ __kernel void execFFT(__global const INPUT_TYPE* restrict in, __global OUTPUT_TY
int x = index/YSIZE; int x = index/YSIZE;
int y = index-x*YSIZE; int y = index-x*YSIZE;
#if OUTPUT_IS_PACKED #if OUTPUT_IS_PACKED
if (x >= XSIZE/2+1) if (x < XSIZE/2+1) {
continue;
#endif #endif
#if LOOP_REQUIRED #if LOOP_REQUIRED
for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0)) for (int z = get_local_id(0); z < ZSIZE; z += get_local_size(0))
...@@ -53,6 +52,9 @@ __kernel void execFFT(__global const INPUT_TYPE* restrict in, __global OUTPUT_TY ...@@ -53,6 +52,9 @@ __kernel void execFFT(__global const INPUT_TYPE* restrict in, __global OUTPUT_TY
#endif #endif
#endif #endif
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
#if OUTPUT_IS_PACKED
}
#endif
COMPUTE_FFT COMPUTE_FFT
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment