Commit 42089cac authored by peastman's avatar peastman
Browse files

Optimization to OpenCL FFT

parent 861a6150
...@@ -158,6 +158,8 @@ int OpenCLFFT3D::findLegalDimension(int minimum) { ...@@ -158,6 +158,8 @@ int OpenCLFFT3D::findLegalDimension(int minimum) {
cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threads, int axis, bool forward, bool inputIsReal) { cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threads, int axis, bool forward, bool inputIsReal) {
int maxThreads = std::min(256, (int) context.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()); int maxThreads = std::min(256, (int) context.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
while (maxThreads > 128 && maxThreads-64 >= zsize)
maxThreads -= 64;
bool isCPU = context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU; bool isCPU = context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU;
while (true) { while (true) {
bool loopRequired = (zsize > maxThreads || isCPU); bool loopRequired = (zsize > maxThreads || isCPU);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment