Commit e1181999 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed bug in PME with triclinic boxes

parent fcbee6a5
...@@ -1938,6 +1938,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1938,6 +1938,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
// Execute the reciprocal space kernels. // Execute the reciprocal space kernels.
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms()); cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
...@@ -1985,8 +1986,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1985,8 +1986,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false); fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
} }
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()}; cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128); cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
if (usePmeStream) { if (usePmeStream) {
cuEventRecord(pmeSyncEvent, pmeStream); cuEventRecord(pmeSyncEvent, pmeStream);
......
extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int2* __restrict__ pmeAtomGridIndex, extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int2* __restrict__ pmeAtomGridIndex,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Compute the index of the grid point each atom is associated with. // Compute the index of the grid point each atom is associated with.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real4 pos = posq[i]; real4 pos = posq[i];
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -196,7 +198,8 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ ...@@ -196,7 +198,8 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern "C" __global__ extern "C" __global__
void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid, void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 data[PME_ORDER]; real3 data[PME_ORDER];
real3 ddata[PME_ORDER]; real3 ddata[PME_ORDER];
const real scale = RECIP(PME_ORDER-1); const real scale = RECIP(PME_ORDER-1);
...@@ -208,9 +211,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __ ...@@ -208,9 +211,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real3 force = make_real3(0); real3 force = make_real3(0);
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
......
...@@ -1913,7 +1913,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1913,7 +1913,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(7, pmeAtomGridIndex->getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge"); pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer()); pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer());
...@@ -1962,16 +1962,16 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1962,16 +1962,16 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
// Execute the reciprocal space kernels. // Execute the reciprocal space kernels.
setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4); setPeriodicBoxArgs(cl, pmeUpdateBsplinesKernel, 4);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeUpdateBsplinesKernel.setArg<mm_double4>(5, recipBoxVectors[0]); pmeUpdateBsplinesKernel.setArg<mm_double4>(9, recipBoxVectors[0]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(6, recipBoxVectors[1]); pmeUpdateBsplinesKernel.setArg<mm_double4>(10, recipBoxVectors[1]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(7, recipBoxVectors[2]); pmeUpdateBsplinesKernel.setArg<mm_double4>(11, recipBoxVectors[2]);
} }
else { else {
pmeUpdateBsplinesKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]); pmeUpdateBsplinesKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[0]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]); pmeUpdateBsplinesKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[1]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]); pmeUpdateBsplinesKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[2]);
} }
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms()); cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) { if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
...@@ -2038,16 +2038,16 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2038,16 +2038,16 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeEvalEnergyKernel, cl.getNumAtoms()); cl.executeKernel(pmeEvalEnergyKernel, cl.getNumAtoms());
cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms()); cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms());
fft->execFFT(*pmeGrid2, *pmeGrid, false); fft->execFFT(*pmeGrid2, *pmeGrid, false);
setPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 3); setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(4, recipBoxVectors[0]); pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
pmeInterpolateForceKernel.setArg<mm_double4>(5, recipBoxVectors[1]); pmeInterpolateForceKernel.setArg<mm_double4>(9, recipBoxVectors[1]);
pmeInterpolateForceKernel.setArg<mm_double4>(6, recipBoxVectors[2]); pmeInterpolateForceKernel.setArg<mm_double4>(10, recipBoxVectors[2]);
} }
else { else {
pmeInterpolateForceKernel.setArg<mm_float4>(4, recipBoxVectorsFloat[0]); pmeInterpolateForceKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[0]);
pmeInterpolateForceKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[1]); pmeInterpolateForceKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[1]);
pmeInterpolateForceKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[2]); pmeInterpolateForceKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[2]);
} }
if (deviceIsCpu) if (deviceIsCpu)
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
......
__kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache, __kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache,
__global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real4 scale = 1/(real) (PME_ORDER-1); const real4 scale = 1/(real) (PME_ORDER-1);
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
__local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER]; __local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
real4 pos = posq[i]; real4 pos = posq[i];
APPLY_PERIODIC_TO_POS(pos)
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -368,7 +370,8 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe ...@@ -368,7 +370,8 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
} }
__kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid, __kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid,
real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX,
real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) {
const real scale = 1/(real) (PME_ORDER-1); const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
real4 ddata[PME_ORDER]; real4 ddata[PME_ORDER];
...@@ -380,9 +383,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global ...@@ -380,9 +383,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 force = 0.0f; real4 force = 0.0f;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
......
...@@ -270,6 +270,49 @@ void testTriclinic() { ...@@ -270,6 +270,49 @@ void testTriclinic() {
ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-4); ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-4);
} }
void testTriclinic2() {
// Create a triclinic box containing a large molecule made up of randomly positioned particles and make sure the
// results match the reference platform.
if (platform.getName() == "Reference")
return;
const int numParticles = 1000;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(3.2, 0, 0), Vec3(-1.1, 3.1, 0), Vec3(-1.1, -1.5, 2.7));
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
NonbondedForce* force = new NonbondedForce();
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
force->addParticle(i%2 == 0 ? -1.0 : 1.0, 1.0, 0.0);
positions[i] = Vec3(10*genrand_real2(sfmt)-2, 10*genrand_real2(sfmt)-2, 10*genrand_real2(sfmt)-2);
if (i > 0) {
Vec3 delta = positions[i-1]-positions[i];
system.addConstraint(i-1, i, sqrt(delta.dot(delta)));
}
}
system.addForce(force);
force->setNonbondedMethod(NonbondedForce::PME);
force->setCutoffDistance(1.0);
force->setReciprocalSpaceForceGroup(1);
force->setPMEParameters(2.62826, 27, 25, 24);
// Compute the forces and energy.
VerletIntegrator integ1(0.001);
Context context1(system, integ1, platform);
context1.setPositions(positions);
VerletIntegrator integ2(0.001);
Context context2(system, integ2, Platform::getPlatformByName("Reference"));
context2.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy, false, 2);
State state2 = context2.getState(State::Forces | State::Energy, false, 2);
ASSERT_EQUAL_TOL(state2.getPotentialEnergy(), state1.getPotentialEnergy(), 1e-4);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(state2.getForces()[i], state1.getForces()[i], 1e-4);
}
void testErrorTolerance(NonbondedForce::NonbondedMethod method) { void testErrorTolerance(NonbondedForce::NonbondedMethod method) {
// Create a cloud of random point charges. // Create a cloud of random point charges.
...@@ -391,6 +434,7 @@ int main(int argc, char* argv[]) { ...@@ -391,6 +434,7 @@ int main(int argc, char* argv[]) {
testEwaldPME(false); testEwaldPME(false);
testEwaldPME(true); testEwaldPME(true);
testTriclinic(); testTriclinic();
testTriclinic2();
testErrorTolerance(NonbondedForce::Ewald); testErrorTolerance(NonbondedForce::Ewald);
testErrorTolerance(NonbondedForce::PME); testErrorTolerance(NonbondedForce::PME);
testPMEParameters(); testPMEParameters();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment