Commit fcbee6a5 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed bug in PME with triclinic boxes

parent a4d327f5
...@@ -1944,6 +1944,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1944,6 +1944,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
sort->sort(*pmeAtomGridIndex); sort->sort(*pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()}; recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128); cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
......
...@@ -18,7 +18,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int ...@@ -18,7 +18,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
} }
extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid, extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 data[PME_ORDER]; real3 data[PME_ORDER];
const real scale = RECIP(PME_ORDER-1); const real scale = RECIP(PME_ORDER-1);
...@@ -28,9 +29,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real ...@@ -28,9 +29,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
......
...@@ -1975,32 +1975,32 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1975,32 +1975,32 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
} }
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms()); cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) { if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5); setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]); pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]); pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]); pmeSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
} }
else { else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]); pmeSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]); pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]); pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
} }
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
} }
else { else {
sort->sort(*pmeAtomGridIndex); sort->sort(*pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5); setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]); pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]); pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]); pmeSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
} }
else { else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]); pmeSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]); pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]); pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
} }
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms()); cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize()); cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
......
...@@ -83,7 +83,8 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co ...@@ -83,7 +83,8 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real scale = 1/(real) (PME_ORDER-1); const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
...@@ -93,9 +94,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -93,9 +94,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -165,7 +164,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea ...@@ -165,7 +164,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
} }
#elif defined(DEVICE_IS_CPU) #elif defined(DEVICE_IS_CPU)
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0); const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0);
const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0); const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0);
if (firstx == lastx) if (firstx == lastx)
...@@ -179,9 +179,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -179,9 +179,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = 0; i < NUM_ATOMS; i++) { for (int i = 0; i < NUM_ATOMS; i++) {
int atom = i;//pmeAtomGridIndex[i].x; int atom = i;//pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment