Commit 295ae001 authored by Peter Eastman's avatar Peter Eastman
Browse files

OpenCL supports PME with triclinic boxes

parent 9bcba51e
...@@ -70,13 +70,6 @@ static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int ind ...@@ -70,13 +70,6 @@ static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int ind
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxSize()); kernel.setArg<mm_float4>(index, cl.getPeriodicBoxSize());
} }
static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getInvPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize());
}
static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) { static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxSizeDouble()); kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxSizeDouble());
...@@ -1786,7 +1779,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1786,7 +1779,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(5, pmeAtomGridIndex->getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(7, pmeAtomGridIndex->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge"); pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid->getDeviceBuffer()); pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid->getDeviceBuffer());
...@@ -1815,44 +1808,105 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1815,44 +1808,105 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (pmeGrid != NULL && includeReciprocal) { if (pmeGrid != NULL && includeReciprocal) {
if (usePmeQueue) if (usePmeQueue)
cl.setQueue(pmeQueue); cl.setQueue(pmeQueue);
// Invert the periodic box vectors.
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
double scale = 1.0/determinant;
mm_double4 recipBoxVectors[3];
recipBoxVectors[0] = mm_double4(boxVectors[1][1]*boxVectors[2][2]*scale, 0, 0, 0);
recipBoxVectors[1] = mm_double4(-boxVectors[1][0]*boxVectors[2][2]*scale, boxVectors[0][0]*boxVectors[2][2]*scale, 0, 0);
recipBoxVectors[2] = mm_double4((boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0])*scale, -boxVectors[0][0]*boxVectors[2][1]*scale, boxVectors[0][0]*boxVectors[1][1]*scale, 0);
mm_float4 recipBoxVectorsFloat[3];
for (int i = 0; i < 3; i++)
recipBoxVectorsFloat[i] = mm_float4((float) recipBoxVectors[i].x, (float) recipBoxVectors[i].y, (float) recipBoxVectors[i].z, 0);
// Execute the reciprocal space kernels.
setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4); setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4);
setInvPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 5); if (cl.getUseDoublePrecision()) {
pmeUpdateBsplinesKernel.setArg<mm_double4>(5, recipBoxVectors[0]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(6, recipBoxVectors[1]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(7, recipBoxVectors[2]);
}
else {
pmeUpdateBsplinesKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms()); cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu) { if (deviceIsCpu) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5); setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
setInvPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 6); if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
} }
else { else {
sort->sort(*pmeAtomGridIndex); sort->sort(*pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5); setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
setInvPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 6); if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms()); cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize()); cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
} }
else { else {
setPeriodicBoxSizeArg(cl, pmeAtomRangeKernel, 3);
setInvPeriodicBoxSizeArg(cl, pmeAtomRangeKernel, 4);
cl.executeKernel(pmeAtomRangeKernel, cl.getNumAtoms()); cl.executeKernel(pmeAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeZIndexKernel, 2); setPeriodicBoxSizeArg(cl, pmeZIndexKernel, 2);
setInvPeriodicBoxSizeArg(cl, pmeZIndexKernel, 3); if (cl.getUseDoublePrecision())
pmeZIndexKernel.setArg<mm_double4>(3, recipBoxVectors[2]);
else
pmeZIndexKernel.setArg<mm_float4>(3, recipBoxVectorsFloat[2]);
cl.executeKernel(pmeZIndexKernel, cl.getNumAtoms()); cl.executeKernel(pmeZIndexKernel, cl.getNumAtoms());
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms()); cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
} }
} }
fft->execFFT(*pmeGrid, *pmeGrid2, true); fft->execFFT(*pmeGrid, *pmeGrid2, true);
setInvPeriodicBoxSizeArg(cl, pmeConvolutionKernel, 5);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble(); mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
double scaleFactor = 1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z); double scaleFactor = 1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z);
if (cl.getUseDoublePrecision()) if (cl.getUseDoublePrecision()) {
pmeConvolutionKernel.setArg<cl_double>(6, scaleFactor); pmeConvolutionKernel.setArg<mm_double4>(5, recipBoxVectors[0]);
else pmeConvolutionKernel.setArg<mm_double4>(6, recipBoxVectors[1]);
pmeConvolutionKernel.setArg<cl_float>(6, (float) scaleFactor); pmeConvolutionKernel.setArg<mm_double4>(7, recipBoxVectors[2]);
pmeConvolutionKernel.setArg<cl_double>(8, scaleFactor);
}
else {
pmeConvolutionKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]);
pmeConvolutionKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]);
pmeConvolutionKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]);
pmeConvolutionKernel.setArg<cl_float>(8, (float) scaleFactor);
}
cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms()); cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms());
fft->execFFT(*pmeGrid2, *pmeGrid, false); fft->execFFT(*pmeGrid2, *pmeGrid, false);
setPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 3); setPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 3);
setInvPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 4); if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
pmeInterpolateForceKernel.setArg<mm_double4>(5, recipBoxVectors[1]);
pmeInterpolateForceKernel.setArg<mm_double4>(6, recipBoxVectors[2]);
}
else {
pmeInterpolateForceKernel.setArg<mm_float4>(4, recipBoxVectorsFloat[0]);
pmeInterpolateForceKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[1]);
pmeInterpolateForceKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[2]);
}
if (deviceIsCpu) if (deviceIsCpu)
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
else else
......
__kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache, __kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache,
__global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 invPeriodicBoxSize) { __global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real4 scale = 1/(real) (PME_ORDER-1); const real4 scale = 1/(real) (PME_ORDER-1);
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
__local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER]; __local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
real4 pos = posq[i]; real4 pos = posq[i];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x; pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y; pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z; pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real4 t = (real4) ((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
(pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
(pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z, 0.0f); pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*GRID_SIZE_X;
t.y = (t.y-floor(t.y))*GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*GRID_SIZE_Z;
real4 dr = (real4) (t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0.0f); real4 dr = (real4) (t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0.0f);
int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X, int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y, ((int) t.y) % GRID_SIZE_Y,
...@@ -41,7 +44,7 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4 ...@@ -41,7 +44,7 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
/** /**
* For each grid point, find the range of sorted atoms associated with that point. * For each grid point, find the range of sorted atoms associated with that point.
*/ */
__kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __global int* restrict pmeAtomRange, __global const real4* restrict posq, real4 periodicBoxSize, real4 invPeriodicBoxSize) { __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __global int* restrict pmeAtomRange, __global const real4* restrict posq) {
int start = (NUM_ATOMS*get_global_id(0))/get_global_size(0); int start = (NUM_ATOMS*get_global_id(0))/get_global_size(0);
int end = (NUM_ATOMS*(get_global_id(0)+1))/get_global_size(0); int end = (NUM_ATOMS*(get_global_id(0)+1))/get_global_size(0);
int last = (start == 0 ? -1 : pmeAtomGridIndex[start-1].y); int last = (start == 0 ? -1 : pmeAtomGridIndex[start-1].y);
...@@ -68,13 +71,13 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g ...@@ -68,13 +71,13 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving * The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel. * some work in the charge spreading kernel.
*/ */
__kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global const real4* restrict posq, real4 periodicBoxSize, real4 invPeriodicBoxSize) { __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global const real4* restrict posq, real4 periodicBoxSize, real4 recipBoxVecZ) {
int start = (NUM_ATOMS*get_global_id(0))/get_global_size(0); int start = (NUM_ATOMS*get_global_id(0))/get_global_size(0);
int end = (NUM_ATOMS*(get_global_id(0)+1))/get_global_size(0); int end = (NUM_ATOMS*(get_global_id(0)+1))/get_global_size(0);
for (int i = start; i < end; ++i) { for (int i = start; i < end; ++i) {
real posz = posq[pmeAtomGridIndex[i].x].z; real posz = posq[pmeAtomGridIndex[i].x].z;
posz -= floor(posz*invPeriodicBoxSize.z)*periodicBoxSize.z; posz -= floor(posz*recipBoxVecZ.z)*periodicBoxSize.z;
int z = ((int) ((posz*invPeriodicBoxSize.z)*GRID_SIZE_Z)) % GRID_SIZE_Z; int z = ((int) ((posz*recipBoxVecZ.z)*GRID_SIZE_Z)) % GRID_SIZE_Z;
pmeAtomGridIndex[i].y = z; pmeAtomGridIndex[i].y = z;
} }
} }
...@@ -83,7 +86,7 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co ...@@ -83,7 +86,7 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize) { __global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real4 scale = 1/(real) (PME_ORDER-1); const real4 scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
...@@ -93,12 +96,15 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -93,12 +96,15 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x; pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y; pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z; pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real4 t = (real4) ((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
(pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
(pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z, 0.0f); pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*GRID_SIZE_X;
t.y = (t.y-floor(t.y))*GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*GRID_SIZE_Z;
int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X, int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y, ((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z, 0); ((int) t.z) % GRID_SIZE_Z, 0);
...@@ -163,7 +169,7 @@ __kernel void finishSpreadCharge(__global long* restrict pmeGrid) { ...@@ -163,7 +169,7 @@ __kernel void finishSpreadCharge(__global long* restrict pmeGrid) {
} }
#elif defined(DEVICE_IS_CPU) #elif defined(DEVICE_IS_CPU)
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real2* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize) { __global real2* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0); const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0);
const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0); const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0);
if (firstx == lastx) if (firstx == lastx)
...@@ -177,12 +183,15 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -177,12 +183,15 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = 0; i < NUM_ATOMS; i++) { for (int i = 0; i < NUM_ATOMS; i++) {
int atom = i;//pmeAtomGridIndex[i].x; int atom = i;//pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x; pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y; pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z; pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real4 t = (real4) ((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
(pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
(pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z, 0.0f); pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*GRID_SIZE_X;
t.y = (t.y-floor(t.y))*GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*GRID_SIZE_Z;
int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X, int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y, ((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z, 0); ((int) t.z) % GRID_SIZE_Z, 0);
...@@ -290,7 +299,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -290,7 +299,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
#endif #endif
__kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global real* restrict energyBuffer, __global const real* restrict pmeBsplineModuliX, __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global real* restrict energyBuffer, __global const real* restrict pmeBsplineModuliX,
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 invPeriodicBoxSize, real recipScaleFactor) { __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ, real recipScaleFactor) {
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z; const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
real energy = 0.0f; real energy = 0.0f;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) { for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
...@@ -303,9 +312,9 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r ...@@ -303,9 +312,9 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X); int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y); int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z); int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
real mhx = mx*invPeriodicBoxSize.x; real mhx = mx*recipBoxVecX.x;
real mhy = my*invPeriodicBoxSize.y; real mhy = mx*recipBoxVecY.x+my*recipBoxVecY.y;
real mhz = mz*invPeriodicBoxSize.z; real mhz = mx*recipBoxVecZ.x+my*recipBoxVecZ.y+mz*recipBoxVecZ.z;
real bx = pmeBsplineModuliX[kx]; real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky]; real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz]; real bz = pmeBsplineModuliZ[kz];
...@@ -320,7 +329,7 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r ...@@ -320,7 +329,7 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r
} }
__kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real2* restrict pmeGrid, __kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real2* restrict pmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, __global int2* restrict pmeAtomGridIndex) { real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) {
const real4 scale = 1/(real) (PME_ORDER-1); const real4 scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
real4 ddata[PME_ORDER]; real4 ddata[PME_ORDER];
...@@ -332,12 +341,15 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global ...@@ -332,12 +341,15 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 force = 0.0f; real4 force = 0.0f;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x; pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y; pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z; pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real4 t = (real4) ((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
(pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
(pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z, 0.0f); pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*GRID_SIZE_X;
t.y = (t.y-floor(t.y))*GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*GRID_SIZE_Z;
int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X, int4 gridIndex = (int4) (((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y, ((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z, 0); ((int) t.z) % GRID_SIZE_Z, 0);
...@@ -385,9 +397,9 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global ...@@ -385,9 +397,9 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
} }
real4 totalForce = forceBuffers[atom]; real4 totalForce = forceBuffers[atom];
real q = pos.w*EPSILON_FACTOR; real q = pos.w*EPSILON_FACTOR;
totalForce.x -= q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x; totalForce.x -= q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
totalForce.y -= q*force.y*GRID_SIZE_Y*invPeriodicBoxSize.y; totalForce.y -= q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
totalForce.z -= q*force.z*GRID_SIZE_Z*invPeriodicBoxSize.z; totalForce.z -= q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
forceBuffers[atom] = totalForce; forceBuffers[atom] = totalForce;
} }
} }
......
...@@ -201,6 +201,57 @@ void testEwald2Ions() { ...@@ -201,6 +201,57 @@ void testEwald2Ions() {
ASSERT_EQUAL_TOL(-217.276, state.getPotentialEnergy(), 0.01/*10*TOL*/); ASSERT_EQUAL_TOL(-217.276, state.getPotentialEnergy(), 0.01/*10*TOL*/);
} }
void testTriclinic() {
// Create a triclinic box containing eight particles.
System system;
system.setDefaultPeriodicBoxVectors(Vec3(2.5, 0, 0), Vec3(0.5, 3.0, 0), Vec3(0.7, 0.9, 3.5));
for (int i = 0; i < 8; i++)
system.addParticle(1.0);
NonbondedForce* force = new NonbondedForce();
system.addForce(force);
force->setNonbondedMethod(NonbondedForce::PME);
force->setCutoffDistance(1.0);
force->setPMEParameters(3.45891, 32, 40, 48);
for (int i = 0; i < 4; i++)
force->addParticle(-1, 0.440104, 0.4184); // Cl parameters
for (int i = 0; i < 4; i++)
force->addParticle(1, 0.332840, 0.0115897); // Na parameters
vector<Vec3> positions(8);
positions[0] = Vec3(1.744, 2.788, 3.162);
positions[1] = Vec3(1.048, 0.762, 2.340);
positions[2] = Vec3(2.489, 1.570, 2.817);
positions[3] = Vec3(1.027, 1.893, 3.271);
positions[4] = Vec3(0.937, 0.825, 0.009);
positions[5] = Vec3(2.290, 1.887, 3.352);
positions[6] = Vec3(1.266, 1.111, 2.894);
positions[7] = Vec3(0.933, 1.862, 3.490);
// Compute the forces and energy.
VerletIntegrator integ(0.001);
Context context(system, integ, platform);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
// Compare them to values computed by Gromacs.
double expectedEnergy = -963.370;
vector<Vec3> expectedForce(8);
expectedForce[0] = Vec3(4.25253e+01, -1.23503e+02, 1.22139e+02);
expectedForce[1] = Vec3(9.74752e+01, 1.68213e+02, 1.93169e+02);
expectedForce[2] = Vec3(-1.50348e+02, 1.29165e+02, 3.70435e+02);
expectedForce[3] = Vec3(9.18644e+02, -3.52571e+00, -1.34772e+03);
expectedForce[4] = Vec3(-1.61193e+02, 9.01528e+01, -7.12904e+01);
expectedForce[5] = Vec3(2.82630e+02, 2.78029e+01, -3.72864e+02);
expectedForce[6] = Vec3(-1.47454e+02, -2.14448e+02, -3.55789e+02);
expectedForce[7] = Vec3(-8.82195e+02, -7.39132e+01, 1.46202e+03);
for (int i = 0; i < 8; i++) {
ASSERT_EQUAL_VEC(expectedForce[i], state.getForces()[i], 1e-4);
}
ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-4);
}
void testErrorTolerance(NonbondedForce::NonbondedMethod method) { void testErrorTolerance(NonbondedForce::NonbondedMethod method) {
// Create a cloud of random point charges. // Create a cloud of random point charges.
...@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) { ...@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
testEwaldPME(false); testEwaldPME(false);
testEwaldPME(true); testEwaldPME(true);
// testEwald2Ions(); // testEwald2Ions();
testTriclinic();
testErrorTolerance(NonbondedForce::Ewald); testErrorTolerance(NonbondedForce::Ewald);
testErrorTolerance(NonbondedForce::PME); testErrorTolerance(NonbondedForce::PME);
testPMEParameters(); testPMEParameters();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment