Commit afae4bc8 authored by peastman's avatar peastman
Browse files

Merge pull request #876 from peastman/fftopt

Optimization to OpenCL FFT
parents 861a6150 42089cac
......@@ -158,6 +158,8 @@ int OpenCLFFT3D::findLegalDimension(int minimum) {
cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threads, int axis, bool forward, bool inputIsReal) {
int maxThreads = std::min(256, (int) context.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
while (maxThreads > 128 && maxThreads-64 >= zsize)
maxThreads -= 64;
bool isCPU = context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU;
while (true) {
bool loopRequired = (zsize > maxThreads || isCPU);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment