Commit 61c50862 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed error on GPUs with only 16K local memory

parent 91f3379b
......@@ -1423,10 +1423,11 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeConvolutionKernel.setArg<cl::Buffer>(2, pmeBsplineModuliX->getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(3, pmeBsplineModuliY->getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(4, pmeBsplineModuliZ->getDeviceBuffer());
interpolateForceThreads = (cl.getDevice().getInfo<CL_DEVICE_LOCAL_MEM_SIZE>() > 2*128*PmeOrder*sizeof(mm_float4) ? 128 : 64);
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
pmeInterpolateForceKernel.setArg(5, 2*128*PmeOrder*sizeof(mm_float4), NULL);
pmeInterpolateForceKernel.setArg(5, 2*interpolateForceThreads*PmeOrder*sizeof(mm_float4), NULL);
if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid->getDeviceBuffer());
......@@ -1479,7 +1480,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
fft->execFFT(*pmeGrid2, *pmeGrid, false);
pmeInterpolateForceKernel.setArg<mm_float4>(3, boxSize);
pmeInterpolateForceKernel.setArg<mm_float4>(4, invBoxSize);
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms(), 128);
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms(), interpolateForceThreads);
}
double energy = ewaldSelfEnergy;
if (dispersionCoefficient != 0.0) {
......
......@@ -525,6 +525,7 @@ private:
cl::Kernel pmeInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines;
double ewaldSelfEnergy, dispersionCoefficient;
int interpolateForceThreads;
static const int PmeOrder = 5;
};
......
......@@ -220,7 +220,6 @@ __kernel void reciprocalConvolution(__global float2* pmeGrid, __global float* en
energyBuffer[get_global_id(0)] += 0.5f*energy;
}
__kernel __attribute__((reqd_work_group_size(128, 1, 1)))
__kernel void gridInterpolateForce(__global float4* posq, __global float4* forceBuffers, __global float2* pmeGrid, float4 periodicBoxSize, float4 invPeriodicBoxSize, __local float4* bsplinesCache) {
const float4 scale = 1.0f/(PME_ORDER-1);
__local float4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment