Commit fcbee6a5 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed bug in PME with triclinic boxes

parent a4d327f5
......@@ -1944,6 +1944,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
sort->sort(*pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
......
......@@ -18,7 +18,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
}
extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 data[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
......@@ -28,9 +29,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
......
......@@ -1975,32 +1975,32 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]);
pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]);
pmeSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
sort->sort(*pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]);
pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]);
pmeSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
......
......@@ -83,7 +83,8 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
__global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER];
......@@ -93,9 +94,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS(pos)
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
......@@ -165,7 +164,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
}
#elif defined(DEVICE_IS_CPU)
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0);
const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0);
if (firstx == lastx)
......@@ -179,9 +179,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = 0; i < NUM_ATOMS; i++) {
int atom = i;//pmeAtomGridIndex[i].x;
real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS(pos)
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment