Commit 9f8910cd authored by peastman's avatar peastman
Browse files

Merge branch 'master' into gayberne

parents 0e683e8d 96a37423
...@@ -1938,12 +1938,14 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1938,12 +1938,14 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
// Execute the reciprocal space kernels. // Execute the reciprocal space kernels.
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms()); cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(*pmeAtomGridIndex); sort->sort(*pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(), void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()}; recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128); cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
...@@ -1984,8 +1986,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1984,8 +1986,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false); fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
} }
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()}; cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128); cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
if (usePmeStream) { if (usePmeStream) {
cuEventRecord(pmeSyncEvent, pmeStream); cuEventRecord(pmeSyncEvent, pmeStream);
......
extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int2* __restrict__ pmeAtomGridIndex, extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int2* __restrict__ pmeAtomGridIndex,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Compute the index of the grid point each atom is associated with. // Compute the index of the grid point each atom is associated with.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real4 pos = posq[i]; real4 pos = posq[i];
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -18,7 +20,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int ...@@ -18,7 +20,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
} }
extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid, extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 data[PME_ORDER]; real3 data[PME_ORDER];
const real scale = RECIP(PME_ORDER-1); const real scale = RECIP(PME_ORDER-1);
...@@ -28,9 +31,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real ...@@ -28,9 +31,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -197,7 +198,8 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ ...@@ -197,7 +198,8 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern "C" __global__ extern "C" __global__
void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid, void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 data[PME_ORDER]; real3 data[PME_ORDER];
real3 ddata[PME_ORDER]; real3 ddata[PME_ORDER];
const real scale = RECIP(PME_ORDER-1); const real scale = RECIP(PME_ORDER-1);
...@@ -209,9 +211,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __ ...@@ -209,9 +211,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real3 force = make_real3(0); real3 force = make_real3(0);
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
......
...@@ -1913,7 +1913,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1913,7 +1913,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(7, pmeAtomGridIndex->getDeviceBuffer()); pmeInterpolateForceKernel.setArg<cl::Buffer>(11, pmeAtomGridIndex->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge"); pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer()); pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid2->getDeviceBuffer());
...@@ -1962,45 +1962,45 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1962,45 +1962,45 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
// Execute the reciprocal space kernels. // Execute the reciprocal space kernels.
setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4); setPeriodicBoxArgs(cl, pmeUpdateBsplinesKernel, 4);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeUpdateBsplinesKernel.setArg<mm_double4>(5, recipBoxVectors[0]); pmeUpdateBsplinesKernel.setArg<mm_double4>(9, recipBoxVectors[0]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(6, recipBoxVectors[1]); pmeUpdateBsplinesKernel.setArg<mm_double4>(10, recipBoxVectors[1]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(7, recipBoxVectors[2]); pmeUpdateBsplinesKernel.setArg<mm_double4>(11, recipBoxVectors[2]);
} }
else { else {
pmeUpdateBsplinesKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]); pmeUpdateBsplinesKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[0]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]); pmeUpdateBsplinesKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[1]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]); pmeUpdateBsplinesKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[2]);
} }
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms()); cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) { if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5); setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]); pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]); pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]); pmeSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
} }
else { else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]); pmeSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]); pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]); pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
} }
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
} }
else { else {
sort->sort(*pmeAtomGridIndex); sort->sort(*pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5); setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]); pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]); pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]); pmeSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
} }
else { else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]); pmeSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]); pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]); pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
} }
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms()); cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize()); cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
...@@ -2038,16 +2038,16 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2038,16 +2038,16 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeEvalEnergyKernel, cl.getNumAtoms()); cl.executeKernel(pmeEvalEnergyKernel, cl.getNumAtoms());
cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms()); cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms());
fft->execFFT(*pmeGrid2, *pmeGrid, false); fft->execFFT(*pmeGrid2, *pmeGrid, false);
setPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 3); setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(4, recipBoxVectors[0]); pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
pmeInterpolateForceKernel.setArg<mm_double4>(5, recipBoxVectors[1]); pmeInterpolateForceKernel.setArg<mm_double4>(9, recipBoxVectors[1]);
pmeInterpolateForceKernel.setArg<mm_double4>(6, recipBoxVectors[2]); pmeInterpolateForceKernel.setArg<mm_double4>(10, recipBoxVectors[2]);
} }
else { else {
pmeInterpolateForceKernel.setArg<mm_float4>(4, recipBoxVectorsFloat[0]); pmeInterpolateForceKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[0]);
pmeInterpolateForceKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[1]); pmeInterpolateForceKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[1]);
pmeInterpolateForceKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[2]); pmeInterpolateForceKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[2]);
} }
if (deviceIsCpu) if (deviceIsCpu)
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1); cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
......
__kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache, __kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache,
__global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real4 scale = 1/(real) (PME_ORDER-1); const real4 scale = 1/(real) (PME_ORDER-1);
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
__local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER]; __local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
real4 pos = posq[i]; real4 pos = posq[i];
APPLY_PERIODIC_TO_POS(pos)
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -83,7 +85,8 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co ...@@ -83,7 +85,8 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real scale = 1/(real) (PME_ORDER-1); const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
...@@ -93,9 +96,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -93,9 +96,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -165,7 +166,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea ...@@ -165,7 +166,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
} }
#elif defined(DEVICE_IS_CPU) #elif defined(DEVICE_IS_CPU)
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0); const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0);
const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0); const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0);
if (firstx == lastx) if (firstx == lastx)
...@@ -179,9 +181,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -179,9 +181,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = 0; i < NUM_ATOMS; i++) { for (int i = 0; i < NUM_ATOMS; i++) {
int atom = i;//pmeAtomGridIndex[i].x; int atom = i;//pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -370,7 +370,8 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe ...@@ -370,7 +370,8 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
} }
__kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid, __kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid,
real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX,
real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) {
const real scale = 1/(real) (PME_ORDER-1); const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
real4 ddata[PME_ORDER]; real4 ddata[PME_ORDER];
...@@ -382,9 +383,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global ...@@ -382,9 +383,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 force = 0.0f; real4 force = 0.0f;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
......
...@@ -270,6 +270,49 @@ void testTriclinic() { ...@@ -270,6 +270,49 @@ void testTriclinic() {
ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-4); ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-4);
} }
void testTriclinic2() {
// Create a triclinic box containing a large molecule made up of randomly positioned particles and make sure the
// results match the reference platform.
if (platform.getName() == "Reference")
return;
const int numParticles = 1000;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(3.2, 0, 0), Vec3(-1.1, 3.1, 0), Vec3(-1.1, -1.5, 2.7));
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
NonbondedForce* force = new NonbondedForce();
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
force->addParticle(i%2 == 0 ? -1.0 : 1.0, 1.0, 0.0);
positions[i] = Vec3(10*genrand_real2(sfmt)-2, 10*genrand_real2(sfmt)-2, 10*genrand_real2(sfmt)-2);
if (i > 0) {
Vec3 delta = positions[i-1]-positions[i];
system.addConstraint(i-1, i, sqrt(delta.dot(delta)));
}
}
system.addForce(force);
force->setNonbondedMethod(NonbondedForce::PME);
force->setCutoffDistance(1.0);
force->setReciprocalSpaceForceGroup(1);
force->setPMEParameters(2.62826, 27, 25, 24);
// Compute the forces and energy.
VerletIntegrator integ1(0.001);
Context context1(system, integ1, platform);
context1.setPositions(positions);
VerletIntegrator integ2(0.001);
Context context2(system, integ2, Platform::getPlatformByName("Reference"));
context2.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy, false, 2);
State state2 = context2.getState(State::Forces | State::Energy, false, 2);
ASSERT_EQUAL_TOL(state2.getPotentialEnergy(), state1.getPotentialEnergy(), 1e-4);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(state2.getForces()[i], state1.getForces()[i], 1e-4);
}
void testErrorTolerance(NonbondedForce::NonbondedMethod method) { void testErrorTolerance(NonbondedForce::NonbondedMethod method) {
// Create a cloud of random point charges. // Create a cloud of random point charges.
...@@ -391,6 +434,7 @@ int main(int argc, char* argv[]) { ...@@ -391,6 +434,7 @@ int main(int argc, char* argv[]) {
testEwaldPME(false); testEwaldPME(false);
testEwaldPME(true); testEwaldPME(true);
testTriclinic(); testTriclinic();
testTriclinic2();
testErrorTolerance(NonbondedForce::Ewald); testErrorTolerance(NonbondedForce::Ewald);
testErrorTolerance(NonbondedForce::PME); testErrorTolerance(NonbondedForce::PME);
testPMEParameters(); testPMEParameters();
......
...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of ...@@ -6,7 +6,7 @@ Simbios, the NIH National Center for Physics-Based Simulation of
Biological Structures at Stanford, funded under the NIH Roadmap for Biological Structures at Stanford, funded under the NIH Roadmap for
Medical Research, grant U54 GM072970. See https://simtk.org. Medical Research, grant U54 GM072970. See https://simtk.org.
Portions copyright (c) 2012-2015 Stanford University and the Authors. Portions copyright (c) 2012-2016 Stanford University and the Authors.
Authors: Peter Eastman Authors: Peter Eastman
Contributors: Jason Swails Contributors: Jason Swails
...@@ -814,7 +814,7 @@ class GromacsTopFile(object): ...@@ -814,7 +814,7 @@ class GromacsTopFile(object):
map = [] map = []
for i in range(mapSize): for i in range(mapSize):
for j in range(mapSize): for j in range(mapSize):
map.append(float(params[8+mapSize*((j+mapSize/2)%mapSize)+((i+mapSize/2)%mapSize)])) map.append(float(params[8+mapSize*((j+mapSize//2)%mapSize)+((i+mapSize//2)%mapSize)]))
map = tuple(map) map = tuple(map)
if map not in mapIndices: if map not in mapIndices:
mapIndices[map] = cmap.addMap(mapSize, map) mapIndices[map] = cmap.addMap(mapSize, map)
......
...@@ -335,7 +335,7 @@ def _mbondi3_radii(topology): ...@@ -335,7 +335,7 @@ def _mbondi3_radii(topology):
def _createEnergyTerms(force, solventDielectric, soluteDielectric, SA, cutoff, kappa, offset): def _createEnergyTerms(force, solventDielectric, soluteDielectric, SA, cutoff, kappa, offset):
# Add the energy terms to the CustomGBForce. These are identical for all the GB models. # Add the energy terms to the CustomGBForce. These are identical for all the GB models.
params = "; solventDielectric=%.16g; soluteDielectric=%.16g; kappa=%.16g; offset=%.16g" % (solventDielectric, soluteDielectric, kappa, offset) params = "; solventDielectric=%.16g; soluteDielectric=%.16g; kappa=%.16g; offset=%.16g" % (solventDielectric, soluteDielectric, kappa, offset)
if cutoff is not None: if cutoff is not None:
params += "; cutoff=%.16g" % cutoff params += "; cutoff=%.16g" % cutoff
...@@ -535,7 +535,7 @@ class GBSAGBnForce(CustomAmberGBForce): ...@@ -535,7 +535,7 @@ class GBSAGBnForce(CustomAmberGBForce):
def addParticle(self, parameters): def addParticle(self, parameters):
parameters = CustomAmberGBForce.addParticle(self, parameters) parameters = CustomAmberGBForce.addParticle(self, parameters)
if parameters[1] < 0.1 or parameters[1] > 0.2: if parameters[1] + self.OFFSET < 0.1 or parameters[1] + self.OFFSET > 0.2:
raise ValueError('Radii must be between 1 and 2 Angstroms for neck lookup') raise ValueError('Radii must be between 1 and 2 Angstroms for neck lookup')
def setParticleParameters(self, idx, parameters): def setParticleParameters(self, idx, parameters):
......
...@@ -361,5 +361,17 @@ class TestAmberPrmtopFile(unittest.TestCase): ...@@ -361,5 +361,17 @@ class TestAmberPrmtopFile(unittest.TestCase):
# Make sure it says something about chamber # Make sure it says something about chamber
self.assertTrue('chamber' in str(e).lower()) self.assertTrue('chamber' in str(e).lower())
def testGBneckRadii(self):
""" Tests that GBneck radii limits are correctly enforced """
from simtk.openmm.app.internal.customgbforces import GBSAGBnForce
f = GBSAGBnForce()
# Make sure legal parameters do not raise
f.addParticle([0, 0.1, 0.5])
f.addParticle([0, 0.2, 0.5])
f.addParticle([0, 0.15, 0.5])
# Now make sure that out-of-range parameters *do* raise
self.assertRaises(ValueError, lambda: f.addParticle([0, 0.9, 0.5]))
self.assertRaises(ValueError, lambda: f.addParticle([0, 0.21, 0.5]))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment