Commit 73643858 authored by Peter Eastman's avatar Peter Eastman
Browse files

Removed unnecessary synchronization that hurt performance on Nvidia

parent a8a78957
......@@ -231,7 +231,6 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa
source<<"if (index < XSIZE*YSIZE)\n";
source<<"out[y*(ZSIZE*XSIZE)+(get_local_id(0)%ZSIZE)*XSIZE+x] = data"<<(stage%2)<<"[get_local_id(0)];\n";
}
source<<"barrier(CLK_GLOBAL_MEM_FENCE);";
map<string, string> replacements;
replacements["XSIZE"] = context.intToString(xsize);
replacements["YSIZE"] = context.intToString(ysize);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment