/* -------------------------------------------------------------------------- * * OpenMM * * -------------------------------------------------------------------------- * * This is part of the OpenMM molecular simulation toolkit originating from * * Simbios, the NIH National Center for Physics-Based Simulation of * * Biological Structures at Stanford, funded under the NIH Roadmap for * * Medical Research, grant U54 GM072970. See https://simtk.org. * * * * Portions copyright (c) 2009-2012 Stanford University and the Authors. * * Authors: Peter Eastman * * Contributors: * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as published * * by the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU Lesser General Public License for more details. * * * * You should have received a copy of the GNU Lesser General Public License * * along with this program. If not, see . * * -------------------------------------------------------------------------- */ #include "OpenCLFFT3D.h" #include "OpenCLExpressionUtilities.h" #include "OpenCLKernelSources.h" #include "SimTKOpenMMRealType.h" #include #include #include using namespace OpenMM; using namespace std; OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize) : context(context), xsize(xsize), ysize(ysize), zsize(zsize) { zkernel = createKernel(xsize, ysize, zsize, zthreads); xkernel = createKernel(ysize, zsize, xsize, xthreads); ykernel = createKernel(zsize, xsize, ysize, ythreads); } void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) { zkernel.setArg(0, in.getDeviceBuffer()); zkernel.setArg(1, out.getDeviceBuffer()); zkernel.setArg(2, forward ? 1 : -1); context.executeKernel(zkernel, xsize*ysize*zsize, zthreads); xkernel.setArg(0, out.getDeviceBuffer()); xkernel.setArg(1, in.getDeviceBuffer()); xkernel.setArg(2, forward ? 1 : -1); context.executeKernel(xkernel, xsize*ysize*zsize, xthreads); ykernel.setArg(0, in.getDeviceBuffer()); ykernel.setArg(1, out.getDeviceBuffer()); ykernel.setArg(2, forward ? 1 : -1); context.executeKernel(ykernel, xsize*ysize*zsize, ythreads); } int OpenCLFFT3D::findLegalDimension(int minimum) { if (minimum < 1) return 1; while (true) { // Attempt to factor the current value. int unfactored = minimum; for (int factor = 2; factor < 8; factor++) { while (unfactored > 1 && unfactored%factor == 0) unfactored /= factor; } if (unfactored == 1) return minimum; minimum++; } } cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threads) { int maxThreads = std::min(256, (int) context.getDevice().getInfo()); bool isCPU = context.getDevice().getInfo() == CL_DEVICE_TYPE_CPU; while (true) { bool loopRequired = (zsize > maxThreads || isCPU); stringstream source; int blocksPerGroup = (loopRequired ? 1 : max(1, maxThreads/zsize)); int stage = 0; int L = zsize; int m = 1; // Factor zsize, generating an appropriate block of code for each factor. while (L > 1) { int input = stage%2; int output = 1-input; int radix; if (L%7 == 0) radix = 7; else if (L%5 == 0) radix = 5; else if (L%4 == 0) radix = 4; else if (L%3 == 0) radix = 3; else if (L%2 == 0) radix = 2; else throw OpenMMException("Illegal size for FFT: "+context.intToString(zsize)); source<<"{\n"; L = L/radix; source<<"// Pass "<<(stage+1)<<" (radix "< replacements; replacements["XSIZE"] = context.intToString(xsize); replacements["YSIZE"] = context.intToString(ysize); replacements["ZSIZE"] = context.intToString(zsize); replacements["BLOCKS_PER_GROUP"] = context.intToString(blocksPerGroup); replacements["M_PI"] = context.doubleToString(M_PI); replacements["COMPUTE_FFT"] = source.str(); replacements["LOOP_REQUIRED"] = (loopRequired ? "1" : "0"); cl::Program program = context.createProgram(context.replaceStrings(OpenCLKernelSources::fft, replacements)); cl::Kernel kernel(program, "execFFT"); threads = (isCPU ? 1 : blocksPerGroup*zsize); int kernelMaxThreads = kernel.getWorkGroupInfo(context.getDevice()); if (threads > kernelMaxThreads) { // The device can't handle this block size, so reduce it. maxThreads = kernelMaxThreads; continue; } int bufferSize = blocksPerGroup*zsize*(context.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2)); kernel.setArg(3, bufferSize, NULL); kernel.setArg(4, bufferSize, NULL); kernel.setArg(5, bufferSize, NULL); return kernel; } }