Commit bc961dd3 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed bugs in CPU version of PME

parent cb388aca
......@@ -936,6 +936,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete pmeBsplineModuliZ;
if (pmeBsplineTheta != NULL)
delete pmeBsplineTheta;
if (pmeBsplineDTheta != NULL)
delete pmeBsplineDTheta;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
......@@ -1059,6 +1061,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeBsplineModuliY = new OpenCLArray<cl_float>(cl, gridSizeY, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray<cl_float>(cl, gridSizeZ, "pmeBsplineModuliZ");
pmeBsplineTheta = new OpenCLArray<mm_float4>(cl, PmeOrder*numParticles, "pmeBsplineTheta");
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (deviceIsCpu)
pmeBsplineDTheta = new OpenCLArray<mm_float4>(cl, PmeOrder*numParticles, "pmeBsplineTheta");
pmeAtomRange = new OpenCLArray<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = new OpenCLArray<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
sort = new OpenCLSort<mm_int2>(cl, cl.getNumAtoms(), "int2", "value.y");
......@@ -1181,6 +1186,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta->getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*sizeof(mm_float4), NULL);
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex->getDeviceBuffer());
if (deviceIsCpu)
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(6, pmeBsplineDTheta->getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
......@@ -1198,7 +1205,12 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
pmeInterpolateForceKernel.setArg(5, 2*interpolateForceThreads*PmeOrder*sizeof(mm_float4), NULL);
if (deviceIsCpu) {
pmeInterpolateForceKernel.setArg<cl::Buffer>(5, pmeBsplineTheta->getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(6, pmeBsplineDTheta->getDeviceBuffer());
}
else
pmeInterpolateForceKernel.setArg(5, 2*interpolateForceThreads*PmeOrder*sizeof(mm_float4), NULL);
if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid->getDeviceBuffer());
......
......@@ -460,8 +460,8 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcNonbondedForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL),
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeAtomRange(NULL),
pmeAtomGridIndex(NULL), sort(NULL), fft(NULL) {
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeBsplineDTheta(NULL),
pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), fft(NULL) {
}
~OpenCLCalcNonbondedForceKernel();
/**
......@@ -492,6 +492,7 @@ private:
OpenCLArray<cl_float>* pmeBsplineModuliY;
OpenCLArray<cl_float>* pmeBsplineModuliZ;
OpenCLArray<mm_float4>* pmeBsplineTheta;
OpenCLArray<mm_float4>* pmeBsplineDTheta;
OpenCLArray<cl_int>* pmeAtomRange;
OpenCLArray<mm_int2>* pmeAtomGridIndex;
OpenCLSort<mm_int2>* sort;
......
__kernel void updateBsplines(__global const float4* restrict posq, __global float4* restrict pmeBsplineTheta, __global float4* restrict pmeBsplineDTheta, __local float4* restrict bsplinesCache, __global int2* restrict pmeAtomGridIndex, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
__kernel void updateBsplines(__global const float4* restrict posq, __global float4* restrict pmeBsplineTheta, __local float4* restrict bsplinesCache, __global int2* restrict pmeAtomGridIndex, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* restrict pmeBsplineDTheta) {
const float4 scale = 1.0f/(PME_ORDER-1);
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
__local float4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
......@@ -112,7 +112,7 @@ __kernel void reciprocalConvolution(__global float2* restrict pmeGrid, __global
energyBuffer[get_global_id(0)] += 0.5f*energy;
}
__kernel void gridInterpolateForce(__global const float4* restrict posq, __global float4* restrict forceBuffers, __global const float4* restrict pmeBsplineTheta, __global const float4* restrict pmeBsplineDTheta, __global const float2* restrict pmeGrid, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
__kernel void gridInterpolateForce(__global const float4* restrict posq, __global float4* restrict forceBuffers, __global const float2* restrict pmeGrid, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global const float4* restrict pmeBsplineTheta, __global const float4* restrict pmeBsplineDTheta) {
for (int atom = get_global_id(0); atom < NUM_ATOMS; atom += get_global_size(0)) {
float4 force = 0.0f;
float4 pos = posq[atom];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment