Unverified Commit e0c80069 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Use VkFFT for OpenCL (#3934)

* Use VkFFT for OpenCL

* Updated comments for OpenCLFFT3D
parent a7800059
...@@ -88,7 +88,7 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT}) ...@@ -88,7 +88,7 @@ ENDIF(${CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT})
# The source is organized into subdirectories, but we handle them all from # The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS. # this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 libraries/pocketfft platforms/reference serialization libraries/irrxml) SET(OPENMM_SOURCE_SUBDIRS . openmmapi olla libraries/jama libraries/quern libraries/lepton libraries/sfmt libraries/lbfgs libraries/hilbert libraries/csha1 libraries/pocketfft libraries/vkfft platforms/reference serialization libraries/irrxml)
IF(X86 OR ARM) IF(X86 OR ARM)
SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/vecmath) SET(OPENMM_SOURCE_SUBDIRS ${OPENMM_SOURCE_SUBDIRS} libraries/vecmath)
ENDIF() ENDIF()
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2023 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -27,10 +27,31 @@ ...@@ -27,10 +27,31 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#define USE_VKFFT
#ifdef USE_VKFFT
#define VKFFT_BACKEND 3
#include "vkFFT.h"
#endif
#include "OpenCLArray.h" #include "OpenCLArray.h"
namespace OpenMM { namespace OpenMM {
#ifdef USE_VKFFT
/**
* This class performs three dimensional Fast Fourier Transforms. It uses the
* VkFFT library (https://github.com/DTolm/VkFFT).
* <p>
* This class is most efficient when the size of each dimension is a product of
* small prime factors: 2, 3, 5, 7, 11, and 13. You can call findLegalDimension()
* to determine the smallest size that satisfies this requirement and is greater
* than or equal to a specified minimum size.
* <p>
* Note that this class performs an unnormalized transform. That means that if you perform
* a forward transform followed immediately by an inverse transform, the effect is to
* multiply every value of the original data set by the total number of data points.
*/
#else
/** /**
* This class performs three dimensional Fast Fourier Transforms. It is based on the * This class performs three dimensional Fast Fourier Transforms. It is based on the
* mixed radix algorithm described in * mixed radix algorithm described in
...@@ -51,6 +72,7 @@ namespace OpenMM { ...@@ -51,6 +72,7 @@ namespace OpenMM {
* a forward transform followed immediately by an inverse transform, the effect is to * a forward transform followed immediately by an inverse transform, the effect is to
* multiply every value of the original data set by the total number of data points. * multiply every value of the original data set by the total number of data points.
*/ */
#endif
class OPENMM_EXPORT_COMMON OpenCLFFT3D { class OPENMM_EXPORT_COMMON OpenCLFFT3D {
public: public:
...@@ -64,6 +86,9 @@ public: ...@@ -64,6 +86,9 @@ public:
* @param realToComplex if true, a real-to-complex transform will be done. Otherwise, it is complex-to-complex. * @param realToComplex if true, a real-to-complex transform will be done. Otherwise, it is complex-to-complex.
*/ */
OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize, bool realToComplex=false); OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize, bool realToComplex=false);
#ifdef USE_VKFFT
~OpenCLFFT3D();
#endif
/** /**
* Perform a Fourier transform. The transform cannot be done in-place: the input and output * Perform a Fourier transform. The transform cannot be done in-place: the input and output
* arrays must be different. Also, the input array is used as workspace, so its contents * arrays must be different. Also, the input array is used as workspace, so its contents
...@@ -86,14 +111,18 @@ public: ...@@ -86,14 +111,18 @@ public:
*/ */
static int findLegalDimension(int minimum); static int findLegalDimension(int minimum);
private: private:
cl::Kernel createKernel(int xsize, int ysize, int zsize, int& threads, int axis, bool forward, bool inputIsReal);
int xsize, ysize, zsize; int xsize, ysize, zsize;
int xthreads, ythreads, zthreads; int xthreads, ythreads, zthreads;
bool packRealAsComplex; bool packRealAsComplex;
OpenCLContext& context; OpenCLContext& context;
#ifdef USE_VKFFT
VkFFTApplication app;
#else
cl::Kernel createKernel(int xsize, int ysize, int zsize, int& threads, int axis, bool forward, bool inputIsReal);
cl::Kernel xkernel, ykernel, zkernel; cl::Kernel xkernel, ykernel, zkernel;
cl::Kernel invxkernel, invykernel, invzkernel; cl::Kernel invxkernel, invykernel, invzkernel;
cl::Kernel packForwardKernel, unpackForwardKernel, packBackwardKernel, unpackBackwardKernel; cl::Kernel packForwardKernel, unpackForwardKernel, packBackwardKernel, unpackBackwardKernel;
#endif
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2023 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -37,6 +37,52 @@ ...@@ -37,6 +37,52 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
#ifdef USE_VKFFT
OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize, bool realToComplex) :
context(context), xsize(xsize), ysize(ysize), zsize(zsize) {
app = {};
VkFFTConfiguration config = {};
config.FFTdim = 3;
config.size[0] = zsize;
config.size[1] = ysize;
config.size[2] = xsize;
config.performR2C = realToComplex;
config.doublePrecision = context.getUseDoublePrecision();
config.device = &context.getDevice()();
config.context = &context.getContext()();
config.inverseReturnToInputBuffer = true;
config.isInputFormatted = 1;
config.inputBufferStride[0] = zsize;
config.inputBufferStride[1] = ysize*zsize;
config.inputBufferStride[2] = xsize*ysize*zsize;
VkFFTResult result = initializeVkFFT(&app, config);
if (result != VKFFT_SUCCESS)
throw OpenMMException("Error initializing VkFFT: "+context.intToString(result));
}
OpenCLFFT3D::~OpenCLFFT3D() {
deleteVkFFT(&app);
}
void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
VkFFTLaunchParams params = {};
if (forward) {
params.inputBuffer = &in.getDeviceBuffer()();
params.buffer = &out.getDeviceBuffer()();
}
else {
params.inputBuffer = &out.getDeviceBuffer()();
params.buffer = &in.getDeviceBuffer()();
}
params.commandQueue = &context.getQueue()();
VkFFTResult result = VkFFTAppend(&app, forward ? -1 : 1, &params);
if (result != VKFFT_SUCCESS)
throw OpenMMException("Error executing VkFFT: "+context.intToString(result));
}
#else
OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize, bool realToComplex) : OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize, bool realToComplex) :
context(context), xsize(xsize), ysize(ysize), zsize(zsize) { context(context), xsize(xsize), ysize(ysize), zsize(zsize) {
packRealAsComplex = false; packRealAsComplex = false;
...@@ -142,23 +188,6 @@ void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) { ...@@ -142,23 +188,6 @@ void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
} }
} }
int OpenCLFFT3D::findLegalDimension(int minimum) {
if (minimum < 1)
return 1;
while (true) {
// Attempt to factor the current value.
int unfactored = minimum;
for (int factor = 2; factor < 8; factor++) {
while (unfactored > 1 && unfactored%factor == 0)
unfactored /= factor;
}
if (unfactored == 1)
return minimum;
minimum++;
}
}
cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threads, int axis, bool forward, bool inputIsReal) { cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threads, int axis, bool forward, bool inputIsReal) {
int maxThreads = min(256, (int) context.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()); int maxThreads = min(256, (int) context.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
while (maxThreads > 128 && maxThreads-64 >= zsize) while (maxThreads > 128 && maxThreads-64 >= zsize)
...@@ -367,3 +396,27 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa ...@@ -367,3 +396,27 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize, int& threa
return kernel; return kernel;
} }
} }
#endif
int OpenCLFFT3D::findLegalDimension(int minimum) {
if (minimum < 1)
return 1;
#ifdef USE_VKFFT
const int maxFactor = 13;
#else
const int maxFactor = 7;
#endif
while (true) {
// Attempt to factor the current value.
int unfactored = minimum;
for (int factor = 2; factor <= maxFactor; factor++) {
while (unfactored > 1 && unfactored%factor == 0)
unfactored /= factor;
}
if (unfactored == 1)
return minimum;
minimum++;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment