Commit 3db6b8ee authored by Jason Swails's avatar Jason Swails
Browse files

Merge branch 'master' into gbn-chk

parents 5d8e92ff 3a80b0f1
......@@ -68,8 +68,9 @@ typedef struct {
*/
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else
unsigned int numTiles,
#endif
......@@ -104,9 +105,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real3 delta = make_real3(localData[tbx+j].x-posq1.x, localData[tbx+j].y-posq1.y, localData[tbx+j].z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -151,9 +150,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = make_real3(localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -291,12 +288,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
posq1.x -= floor((posq1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
posq1.y -= floor((posq1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
posq1.z -= floor((posq1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
localData[threadIdx.x].x -= floor((localData[threadIdx.x].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].y -= floor((localData[threadIdx.x].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].z -= floor((localData[threadIdx.x].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x], blockCenterX)
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = make_real3(localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z);
......@@ -342,9 +335,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = make_real3(localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[tbx+tj];
......@@ -413,8 +404,9 @@ typedef struct {
extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce,
real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else
unsigned int numTiles,
#endif
......@@ -451,9 +443,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4 posq2 = make_real4(localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -508,9 +498,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -656,12 +644,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
posq1.x -= floor((posq1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
posq1.y -= floor((posq1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
posq1.z -= floor((posq1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
localData[threadIdx.x].x -= floor((localData[threadIdx.x].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].y -= floor((localData[threadIdx.x].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].z -= floor((localData[threadIdx.x].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x], blockCenterX)
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
......@@ -713,9 +697,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......
......@@ -2,7 +2,8 @@
* Scale the particle positions with each axis independent
*/
extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4* __restrict__ posq,
extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize,
real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4* __restrict__ posq,
const int* __restrict__ moleculeAtoms, const int* __restrict__ moleculeStartIndex) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numMolecules; index += blockDim.x*gridDim.x) {
int first = moleculeStartIndex[index];
......@@ -25,13 +26,9 @@ extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scal
// Move it into the first periodic box.
int xcell = (int) floor(center.x*invPeriodicBoxSize.x);
int ycell = (int) floor(center.y*invPeriodicBoxSize.y);
int zcell = (int) floor(center.z*invPeriodicBoxSize.z);
real3 delta = make_real3(xcell*periodicBoxSize.x, ycell*periodicBoxSize.y, zcell*periodicBoxSize.z);
center.x -= delta.x;
center.y -= delta.y;
center.z -= delta.z;
real3 oldCenter = center;
APPLY_PERIODIC_TO_POS(center)
real3 delta = make_real3(oldCenter.x-center.x, oldCenter.y-center.y, oldCenter.z-center.z);
// Now scale the position of the molecule center.
......
......@@ -103,8 +103,9 @@ extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount,real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#endif
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
......@@ -155,9 +156,7 @@ extern "C" __global__ void computeNonbonded(
#endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
......@@ -223,9 +222,7 @@ extern "C" __global__ void computeNonbonded(
#endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
......@@ -412,17 +409,11 @@ extern "C" __global__ void computeNonbonded(
// The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
posq1.x -= floor((posq1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
posq1.y -= floor((posq1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
posq1.z -= floor((posq1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
#ifdef ENABLE_SHUFFLE
shflPosq.x -= floor((shflPosq.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
shflPosq.y -= floor((shflPosq.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
shflPosq.z -= floor((shflPosq.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(shflPosq, blockCenterX)
#else
localData[threadIdx.x].x -= floor((localData[threadIdx.x].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].y -= floor((localData[threadIdx.x].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].z -= floor((localData[threadIdx.x].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x], blockCenterX)
#endif
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
......@@ -499,9 +490,7 @@ extern "C" __global__ void computeNonbonded(
#endif
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
......
extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int2* __restrict__ pmeAtomGridIndex,
real4 periodicBoxSize, real4 invPeriodicBoxSize) {
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Compute the index of the grid point each atom is associated with.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real4 pos = posq[i];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
real3 t = make_real3((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X,
(pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y,
(pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z);
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*GRID_SIZE_X;
t.y = (t.y-floor(t.y))*GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*GRID_SIZE_Z;
int3 gridIndex = make_int3(((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z);
((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z);
pmeAtomGridIndex[i] = make_int2(i, gridIndex.x*GRID_SIZE_Y*GRID_SIZE_Z+gridIndex.y*GRID_SIZE_Z+gridIndex.z);
}
}
extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, const int2* __restrict__ pmeAtomGridIndex) {
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 data[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
......@@ -27,15 +27,16 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int atom = pmeAtomGridIndex[i].x;
real charge = posq[atom].w;
real3 force = make_real3(0);
real4 pos = posq[atom];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
real3 t = make_real3((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X,
(pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y,
(pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z);
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*GRID_SIZE_X;
t.y = (t.y-floor(t.y))*GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*GRID_SIZE_Z;
int3 gridIndex = make_int3(((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z);
......@@ -78,7 +79,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = ybase + zindex;
real add = charge*dx*dy*data[iz].z;
real add = pos.w*dx*dy*data[iz].z;
#ifdef USE_DOUBLE_PRECISION
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
atomicAdd(&ulonglong_p[index], static_cast<unsigned long long>((long long) (add*0x100000000)));
......@@ -115,9 +116,8 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern "C" __global__ void
reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ energyBuffer,
const real* __restrict__ pmeBsplineModuliX,
const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ,
real4 periodicBoxSize, real4 invPeriodicBoxSize) {
const real* __restrict__ pmeBsplineModuliX, const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
......@@ -131,9 +131,9 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
real mhx = mx*invPeriodicBoxSize.x;
real mhy = my*invPeriodicBoxSize.y;
real mhz = mz*invPeriodicBoxSize.z;
real mhx = mx*recipBoxVecX.x;
real mhy = mx*recipBoxVecY.x+my*recipBoxVecY.y;
real mhz = mx*recipBoxVecZ.x+my*recipBoxVecZ.y+mz*recipBoxVecZ.z;
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
......@@ -151,9 +151,8 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
extern "C" __global__ void
gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ energyBuffer,
const real* __restrict__ pmeBsplineModuliX,
const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ,
real4 periodicBoxSize, real4 invPeriodicBoxSize) {
const real* __restrict__ pmeBsplineModuliX, const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
......@@ -168,9 +167,9 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
real mhx = mx*invPeriodicBoxSize.x;
real mhy = my*invPeriodicBoxSize.y;
real mhz = mz*invPeriodicBoxSize.z;
real mhx = mx*recipBoxVecX.x;
real mhy = mx*recipBoxVecY.x+my*recipBoxVecY.y;
real mhz = mx*recipBoxVecZ.x+my*recipBoxVecZ.y+mz*recipBoxVecZ.z;
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
......@@ -194,7 +193,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
extern "C" __global__
void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, const int2* __restrict__ pmeAtomGridIndex) {
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 data[PME_ORDER];
real3 ddata[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
......@@ -206,12 +205,15 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
int atom = pmeAtomGridIndex[i].x;
real3 force = make_real3(0);
real4 pos = posq[atom];
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
real3 t = make_real3((pos.x*invPeriodicBoxSize.x)*GRID_SIZE_X,
(pos.y*invPeriodicBoxSize.y)*GRID_SIZE_Y,
(pos.z*invPeriodicBoxSize.z)*GRID_SIZE_Z);
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*GRID_SIZE_X;
t.y = (t.y-floor(t.y))*GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*GRID_SIZE_Z;
int3 gridIndex = make_int3(((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z);
......@@ -266,9 +268,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
real q = pos.w*EPSILON_FACTOR;
forceBuffers[atom] += static_cast<unsigned long long>((long long) (-q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x*0x100000000));
forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (-q*force.y*GRID_SIZE_Y*invPeriodicBoxSize.y*0x100000000));
forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (-q*force.z*GRID_SIZE_Z*invPeriodicBoxSize.z*0x100000000));
real forceX = -q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
real forceZ = -q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
forceBuffers[atom] += static_cast<unsigned long long>((long long) (forceX*0x100000000));
forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (forceY*0x100000000));
forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (forceZ*0x100000000));
}
}
......
......@@ -55,7 +55,17 @@ const double TOL = 1e-5;
CudaPlatform platform;
void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& positions, const vector<const int*>& expectedSets, double boxSize) {
Vec3 computeDelta(const Vec3& pos1, const Vec3& pos2, bool periodic, const Vec3* periodicBoxVectors) {
Vec3 diff = pos1-pos2;
if (periodic) {
diff -= periodicBoxVectors[2]*floor(diff[2]/periodicBoxVectors[2][2]+0.5);
diff -= periodicBoxVectors[1]*floor(diff[1]/periodicBoxVectors[1][1]+0.5);
diff -= periodicBoxVectors[0]*floor(diff[0]/periodicBoxVectors[0][0]+0.5);
}
return diff;
}
void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& positions, const vector<const int*>& expectedSets, double boxSize, bool triclinic) {
// Create a System and Context.
int numParticles = force->getNumParticles();
......@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
Vec3 boxVectors[3];
if (triclinic) {
boxVectors[0] = Vec3(boxSize, 0, 0);
boxVectors[1] = Vec3(0.2*boxSize, boxSize, 0);
boxVectors[2] = Vec3(-0.3*boxSize, -0.1*boxSize, boxSize);
}
else {
boxVectors[0] = Vec3(boxSize, 0, 0);
boxVectors[1] = Vec3(0, boxSize, 0);
boxVectors[2] = Vec3(0, 0, boxSize);
}
system.setDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
system.addForce(force);
VerletIntegrator integrator(0.001);
Context context(system, integrator, platform);
......@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
// See if the energy matches the expected value.
double expectedEnergy = 0;
bool periodic = (nonbondedMethod == CustomManyParticleForce::CutoffPeriodic);
for (int i = 0; i < (int) expectedSets.size(); i++) {
int p1 = expectedSets[i][0];
int p2 = expectedSets[i][1];
int p3 = expectedSets[i][2];
Vec3 d12 = positions[p2]-positions[p1];
Vec3 d13 = positions[p3]-positions[p1];
Vec3 d23 = positions[p3]-positions[p2];
if (nonbondedMethod == CustomManyParticleForce::CutoffPeriodic) {
for (int j = 0; j < 3; j++) {
d12[j] -= floor(d12[j]/boxSize+0.5f)*boxSize;
d13[j] -= floor(d13[j]/boxSize+0.5f)*boxSize;
d23[j] -= floor(d23[j]/boxSize+0.5f)*boxSize;
}
}
Vec3 d12 = computeDelta(positions[p2], positions[p1], periodic, boxVectors);
Vec3 d13 = computeDelta(positions[p3], positions[p1], periodic, boxVectors);
Vec3 d23 = computeDelta(positions[p3], positions[p2], periodic, boxVectors);
double r12 = sqrt(d12.dot(d12));
double r13 = sqrt(d13.dot(d13));
double r23 = sqrt(d23.dot(d23));
......@@ -210,7 +225,7 @@ void testNoCutoff() {
positions.push_back(Vec3(0.4, 0, -0.8));
int sets[4][3] = {{0,1,2}, {1,2,3}, {2,3,0}, {3,0,1}};
vector<const int*> expectedSets(&sets[0], &sets[4]);
validateAxilrodTeller(force, positions, expectedSets, 2.0);
validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
}
void testCutoff() {
......@@ -235,7 +250,7 @@ void testCutoff() {
positions.push_back(Vec3(0.2, 0.5, -0.1));
int sets[7][3] = {{0,1,2}, {0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,2,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[7]);
validateAxilrodTeller(force, positions, expectedSets, 2.0);
validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
}
void testPeriodic() {
......@@ -261,7 +276,33 @@ void testPeriodic() {
double boxSize = 2.1;
int sets[5][3] = {{0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[5]);
validateAxilrodTeller(force, positions, expectedSets, boxSize);
validateAxilrodTeller(force, positions, expectedSets, boxSize, false);
}
void testTriclinic() {
CustomManyParticleForce* force = new CustomManyParticleForce(3,
"C*(1+3*cos(theta1)*cos(theta2)*cos(theta3))/(r12*r13*r23)^3;"
"theta1=angle(p1,p2,p3); theta2=angle(p2,p3,p1); theta3=angle(p3,p1,p2);"
"r12=distance(p1,p2); r13=distance(p1,p3); r23=distance(p2,p3)");
force->addGlobalParameter("C", 1.5);
force->setNonbondedMethod(CustomManyParticleForce::CutoffPeriodic);
force->setCutoffDistance(1.05);
vector<double> params;
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
vector<Vec3> positions;
positions.push_back(Vec3(0, 0, 0));
positions.push_back(Vec3(1, 0, 0));
positions.push_back(Vec3(0, 1.1, 0.3));
positions.push_back(Vec3(0.4, 0, -0.8));
positions.push_back(Vec3(0.2, 0.5, -0.1));
double boxSize = 2.1;
int sets[4][3] = {{0,1,3}, {0,1,4}, {0,3,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[4]);
validateAxilrodTeller(force, positions, expectedSets, boxSize, true);
}
void testExclusions() {
......@@ -286,7 +327,7 @@ void testExclusions() {
force->addExclusion(0, 3);
int sets[5][3] = {{0,1,4}, {1,2,3}, {1,2,4}, {1,3,4}, {2,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[5]);
validateAxilrodTeller(force, positions, expectedSets, 2.0);
validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
}
void testAllTerms() {
......@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) {
testNoCutoff();
testCutoff();
testPeriodic();
testTriclinic();
testExclusions();
testAllTerms();
testParameters();
......
......@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -261,6 +261,65 @@ void testPeriodic() {
ASSERT_EQUAL_TOL(1.9+1+0.9, state.getPotentialEnergy(), TOL);
}
void testTriclinic() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
Vec3 a(3.1, 0, 0);
Vec3 b(0.4, 3.5, 0);
Vec3 c(-0.1, -0.5, 4.0);
system.setDefaultPeriodicBoxVectors(a, b, c);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("r");
nonbonded->addParticle(vector<double>());
nonbonded->addParticle(vector<double>());
nonbonded->setNonbondedMethod(CustomNonbondedForce::CutoffPeriodic);
const double cutoff = 1.5;
nonbonded->setCutoffDistance(cutoff);
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int iteration = 0; iteration < 50; iteration++) {
// Generate random positions for the two particles.
positions[0] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
positions[1] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
context.setPositions(positions);
// Loop over all possible periodic copies and find the nearest one.
Vec3 delta;
double distance2 = 100.0;
for (int i = -1; i < 2; i++)
for (int j = -1; j < 2; j++)
for (int k = -1; k < 2; k++) {
Vec3 d = positions[1]-positions[0]+a*i+b*j+c*k;
if (d.dot(d) < distance2) {
delta = d;
distance2 = d.dot(d);
}
}
double distance = sqrt(distance2);
// See if the force and energy are correct.
State state = context.getState(State::Forces | State::Energy);
if (distance >= cutoff) {
ASSERT_EQUAL(0.0, state.getPotentialEnergy());
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[0], 0);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[1], 0);
}
else {
const Vec3 force = delta/sqrt(delta.dot(delta));
ASSERT_EQUAL_TOL(distance, state.getPotentialEnergy(), TOL);
ASSERT_EQUAL_VEC(force, state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(-force, state.getForces()[1], TOL);
}
}
}
void testContinuous1DFunction() {
System system;
system.addParticle(1.0);
......@@ -925,6 +984,7 @@ int main(int argc, char* argv[]) {
testExclusions();
testCutoff();
testPeriodic();
testTriclinic();
testContinuous1DFunction();
testContinuous2DFunction();
testContinuous3DFunction();
......
......@@ -201,6 +201,57 @@ void testEwald2Ions() {
ASSERT_EQUAL_TOL(-217.276, state.getPotentialEnergy(), 0.01/*10*TOL*/);
}
void testTriclinic() {
// Create a triclinic box containing eight particles.
System system;
system.setDefaultPeriodicBoxVectors(Vec3(2.5, 0, 0), Vec3(0.5, 3.0, 0), Vec3(0.7, 0.9, 3.5));
for (int i = 0; i < 8; i++)
system.addParticle(1.0);
NonbondedForce* force = new NonbondedForce();
system.addForce(force);
force->setNonbondedMethod(NonbondedForce::PME);
force->setCutoffDistance(1.0);
force->setPMEParameters(3.45891, 32, 40, 48);
for (int i = 0; i < 4; i++)
force->addParticle(-1, 0.440104, 0.4184); // Cl parameters
for (int i = 0; i < 4; i++)
force->addParticle(1, 0.332840, 0.0115897); // Na parameters
vector<Vec3> positions(8);
positions[0] = Vec3(1.744, 2.788, 3.162);
positions[1] = Vec3(1.048, 0.762, 2.340);
positions[2] = Vec3(2.489, 1.570, 2.817);
positions[3] = Vec3(1.027, 1.893, 3.271);
positions[4] = Vec3(0.937, 0.825, 0.009);
positions[5] = Vec3(2.290, 1.887, 3.352);
positions[6] = Vec3(1.266, 1.111, 2.894);
positions[7] = Vec3(0.933, 1.862, 3.490);
// Compute the forces and energy.
VerletIntegrator integ(0.001);
Context context(system, integ, platform);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
// Compare them to values computed by Gromacs.
double expectedEnergy = -963.370;
vector<Vec3> expectedForce(8);
expectedForce[0] = Vec3(4.25253e+01, -1.23503e+02, 1.22139e+02);
expectedForce[1] = Vec3(9.74752e+01, 1.68213e+02, 1.93169e+02);
expectedForce[2] = Vec3(-1.50348e+02, 1.29165e+02, 3.70435e+02);
expectedForce[3] = Vec3(9.18644e+02, -3.52571e+00, -1.34772e+03);
expectedForce[4] = Vec3(-1.61193e+02, 9.01528e+01, -7.12904e+01);
expectedForce[5] = Vec3(2.82630e+02, 2.78029e+01, -3.72864e+02);
expectedForce[6] = Vec3(-1.47454e+02, -2.14448e+02, -3.55789e+02);
expectedForce[7] = Vec3(-8.82195e+02, -7.39132e+01, 1.46202e+03);
for (int i = 0; i < 8; i++) {
ASSERT_EQUAL_VEC(expectedForce[i], state.getForces()[i], 1e-4);
}
ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-4);
}
void testErrorTolerance(NonbondedForce::NonbondedMethod method) {
// Create a cloud of random point charges.
......@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
testEwaldPME(false);
testEwaldPME(true);
// testEwald2Ions();
testTriclinic();
testErrorTolerance(NonbondedForce::Ewald);
testErrorTolerance(NonbondedForce::PME);
testPMEParameters();
......
......@@ -236,6 +236,82 @@ void testRandomSeed() {
}
}
void testTriclinic() {
const int numParticles = 64;
const int frequency = 10;
const int steps = 1000;
const double pressure = 1.5;
const double pressureInMD = pressure*(AVOGADRO*1e-25); // pressure in kJ/mol/nm^3
const double temperature = 300.0;
const double initialVolume = numParticles*BOLTZ*temperature/pressureInMD;
const double initialLength = std::pow(initialVolume, 1.0/3.0);
// Create a gas of noninteracting particles.
System system;
Vec3 initialBox[3];
initialBox[0] = Vec3(initialLength, 0, 0);
initialBox[1] = Vec3(0.2*initialLength, initialLength, 0);
initialBox[2] = Vec3(0.1*initialLength, 0.3*initialLength, initialLength);
system.setDefaultPeriodicBoxVectors(initialBox[0], initialBox[1], initialBox[2]);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
system.addParticle(1.0);
positions[i] = Vec3(initialLength*genrand_real2(sfmt), initialLength*genrand_real2(sfmt), initialLength*genrand_real2(sfmt));
}
MonteCarloAnisotropicBarostat* barostat = new MonteCarloAnisotropicBarostat(Vec3(pressure, pressure, pressure), temperature, true, true, true, frequency);
system.addForce(barostat);
// Run a simulation
LangevinIntegrator integrator(temperature, 0.1, 0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
// Let it equilibrate.
integrator.step(10000);
// Now run it for a while and see if the volume is correct.
double volume = 0.0;
for (int j = 0; j < steps; ++j) {
Vec3 box[3];
context.getState(0).getPeriodicBoxVectors(box[0], box[1], box[2]);
volume += box[0][0]*box[1][1]*box[2][2];
integrator.step(frequency);
}
volume /= steps;
double expected = (numParticles+1)*BOLTZ*temperature/pressureInMD;
ASSERT_USUALLY_EQUAL_TOL(expected, volume, 3/std::sqrt((double) steps));
// Make sure the box vectors have been scaled consistently.
State state = context.getState(State::Positions);
Vec3 box[3];
state.getPeriodicBoxVectors(box[0], box[1], box[2]);
double xscale = box[2][0]/(0.1*initialLength);
double yscale = box[2][1]/(0.3*initialLength);
double zscale = box[2][2]/(1.0*initialLength);
for (int i = 0; i < 3; i++) {
ASSERT_EQUAL_VEC(Vec3(xscale*initialBox[i][0], yscale*initialBox[i][1], zscale*initialBox[i][2]), box[i], 1e-5);
}
// The barostat should have put all particles inside the first periodic box. One integration step
// has happened since then, so they may have moved slightly outside it.
for (int i = 0; i < numParticles; i++) {
Vec3 pos = state.getPositions()[i];
ASSERT(pos[2]/box[2][2] > -1 && pos[2]/box[2][2] < 2);
pos -= box[2]*floor(pos[2]/box[2][2]);
ASSERT(pos[1]/box[1][1] > -1 && pos[1]/box[1][1] < 2);
pos -= box[1]*floor(pos[1]/box[1][1]);
ASSERT(pos[0]/box[0][0] > -1 && pos[0]/box[0][0] < 2);
}
}
/**
* Run a constant pressure simulation on an anisotropic Einstein crystal
* using isotropic and anisotropic barostats. There are a total of 15 simulations:
......@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) {
testIdealGasAxis(1);
testIdealGasAxis(2);
testRandomSeed();
testTriclinic();
//testEinsteinCrystal();
}
catch(const exception& e) {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -355,6 +355,67 @@ void testPeriodic() {
ASSERT_EQUAL_TOL(2*ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL);
}
void testTriclinic() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
Vec3 a(3.1, 0, 0);
Vec3 b(0.4, 3.5, 0);
Vec3 c(-0.1, -0.5, 4.0);
system.setDefaultPeriodicBoxVectors(a, b, c);
VerletIntegrator integrator(0.01);
NonbondedForce* nonbonded = new NonbondedForce();
nonbonded->addParticle(1.0, 1, 0);
nonbonded->addParticle(1.0, 1, 0);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
const double cutoff = 1.5;
nonbonded->setCutoffDistance(cutoff);
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
const double eps = 78.3;
const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
for (int iteration = 0; iteration < 50; iteration++) {
// Generate random positions for the two particles.
positions[0] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
positions[1] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
context.setPositions(positions);
// Loop over all possible periodic copies and find the nearest one.
Vec3 delta;
double distance2 = 100.0;
for (int i = -1; i < 2; i++)
for (int j = -1; j < 2; j++)
for (int k = -1; k < 2; k++) {
Vec3 d = positions[1]-positions[0]+a*i+b*j+c*k;
if (d.dot(d) < distance2) {
delta = d;
distance2 = d.dot(d);
}
}
double distance = sqrt(distance2);
// See if the force and energy are correct.
State state = context.getState(State::Forces | State::Energy);
if (distance >= cutoff) {
ASSERT_EQUAL(0.0, state.getPotentialEnergy());
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[0], 0);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[1], 0);
}
else {
const Vec3 force = delta*ONE_4PI_EPS0*(-1.0/(distance*distance*distance)+2.0*krf);
ASSERT_EQUAL_TOL(ONE_4PI_EPS0*(1.0/distance+krf*distance*distance-crf), state.getPotentialEnergy(), TOL);
ASSERT_EQUAL_VEC(force, state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(-force, state.getForces()[1], TOL);
}
}
}
void testLargeSystem() {
const int numMolecules = 600;
......@@ -862,6 +923,33 @@ void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
}
}
void testReordering() {
// Check that reordering of atoms doesn't alter their positions.
const int numParticles = 200;
System system;
system.setDefaultPeriodicBoxVectors(Vec3(6, 0, 0), Vec3(2.1, 6, 0), Vec3(-1.5, -0.5, 6));
NonbondedForce *nonbonded = new NonbondedForce();
nonbonded->setNonbondedMethod(NonbondedForce::PME);
system.addForce(nonbonded);
vector<Vec3> positions;
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
nonbonded->addParticle(0.0, 0.0, 0.0);
positions.push_back(Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5)*20);
}
VerletIntegrator integrator(0.001);
Context context(system, integrator, platform);
context.setPositions(positions);
integrator.step(1);
State state = context.getState(State::Positions | State::Velocities);
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(positions[i], state.getPositions()[i], 1e-6);
}
}
int main(int argc, char* argv[]) {
try {
if (argc > 1)
......@@ -872,6 +960,7 @@ int main(int argc, char* argv[]) {
testCutoff();
testCutoff14();
testPeriodic();
testTriclinic();
testLargeSystem();
//testBlockInteractions(false);
//testBlockInteractions(true);
......@@ -882,6 +971,7 @@ int main(int argc, char* argv[]) {
testParallelComputation(NonbondedForce::PME);
testSwitchingFunction(NonbondedForce::CutoffNonPeriodic);
testSwitchingFunction(NonbondedForce::PME);
testReordering();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -448,36 +448,65 @@ public:
/**
* Get whether the device being used supports 64 bit atomic operations on global memory.
*/
bool getSupports64BitGlobalAtomics() {
bool getSupports64BitGlobalAtomics() const {
return supports64BitGlobalAtomics;
}
/**
* Get whether the device being used supports double precision math.
*/
bool getSupportsDoublePrecision() {
bool getSupportsDoublePrecision() const {
return supportsDoublePrecision;
}
/**
* Get whether double precision is being used.
*/
bool getUseDoublePrecision() {
bool getUseDoublePrecision() const {
return useDoublePrecision;
}
/**
* Get whether mixed precision is being used.
*/
bool getUseMixedPrecision() {
bool getUseMixedPrecision() const {
return useMixedPrecision;
}
/**
* Get whether the periodic box is triclinic.
*/
bool getBoxIsTriclinic() const {
return boxIsTriclinic;
}
/**
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
*/
std::string doubleToString(double value);
std::string doubleToString(double value) const;
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
std::string intToString(int value);
std::string intToString(int value) const;
/**
* Get the vectors defining the periodic box.
*/
void getPeriodicBoxVectors(Vec3& a, Vec3& b, Vec3& c) const {
a = Vec3(periodicBoxVecXDouble.x, periodicBoxVecXDouble.y, periodicBoxVecXDouble.z);
b = Vec3(periodicBoxVecYDouble.x, periodicBoxVecYDouble.y, periodicBoxVecYDouble.z);
c = Vec3(periodicBoxVecZDouble.x, periodicBoxVecZDouble.y, periodicBoxVecZDouble.z);
}
/**
* Set the vectors defining the periodic box.
*/
void setPeriodicBoxVectors(const Vec3& a, const Vec3& b, const Vec3& c) {
periodicBoxVecX = mm_float4((float) a[0], (float) a[1], (float) a[2], 0.0f);
periodicBoxVecY = mm_float4((float) b[0], (float) b[1], (float) b[2], 0.0f);
periodicBoxVecZ = mm_float4((float) c[0], (float) c[1], (float) c[2], 0.0f);
periodicBoxVecXDouble = mm_double4(a[0], a[1], a[2], 0.0);
periodicBoxVecYDouble = mm_double4(b[0], b[1], b[2], 0.0);
periodicBoxVecZDouble = mm_double4(c[0], c[1], c[2], 0.0);
periodicBoxSize = mm_float4((float) a[0], (float) b[1], (float) c[2], 0.0f);
invPeriodicBoxSize = mm_float4(1.0f/(float) a[0], 1.0f/(float) b[1], 1.0f/(float) c[2], 0.0f);
periodicBoxSizeDouble = mm_double4(a[0], b[1], c[2], 0.0);
invPeriodicBoxSizeDouble = mm_double4(1.0/a[0], 1.0/b[1], 1.0/c[2], 0.0);
}
/**
* Get the size of the periodic box.
*/
......@@ -490,15 +519,6 @@ public:
mm_double4 getPeriodicBoxSizeDouble() const {
return periodicBoxSizeDouble;
}
/**
* Set the size of the periodic box.
*/
void setPeriodicBoxSize(double xsize, double ysize, double zsize) {
periodicBoxSize = mm_float4((float) xsize, (float) ysize, (float) zsize, 0);
invPeriodicBoxSize = mm_float4((float) (1.0/xsize), (float) (1.0/ysize), (float) (1.0/zsize), 0);
periodicBoxSizeDouble = mm_double4(xsize, ysize, zsize, 0);
invPeriodicBoxSizeDouble = mm_double4(1.0/xsize, 1.0/ysize, 1.0/zsize, 0);
}
/**
* Get the inverse of the size of the periodic box.
*/
......@@ -511,6 +531,42 @@ public:
mm_double4 getInvPeriodicBoxSizeDouble() const {
return invPeriodicBoxSizeDouble;
}
/**
* Get the first periodic box vector.
*/
mm_float4 getPeriodicBoxVecX() {
return periodicBoxVecX;
}
/**
* Get the first periodic box vector.
*/
mm_double4 getPeriodicBoxVecXDouble() {
return periodicBoxVecXDouble;
}
/**
* Get the second periodic box vector.
*/
mm_float4 getPeriodicBoxVecY() {
return periodicBoxVecY;
}
/**
* Get the second periodic box vector.
*/
mm_double4 getPeriodicBoxVecYDouble() {
return periodicBoxVecYDouble;
}
/**
* Get the third periodic box vector.
*/
mm_float4 getPeriodicBoxVecZ() {
return periodicBoxVecZ;
}
/**
* Get the third periodic box vector.
*/
mm_double4 getPeriodicBoxVecZDouble() {
return periodicBoxVecZDouble;
}
/**
* Get the OpenCLIntegrationUtilities for this context.
*/
......@@ -628,9 +684,9 @@ private:
int numThreadBlocks;
int numForceBuffers;
int simdWidth;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered;
mm_float4 periodicBoxSize, invPeriodicBoxSize;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble;
bool supports64BitGlobalAtomics, supportsDoublePrecision, useDoublePrecision, useMixedPrecision, atomsWereReordered, boxIsTriclinic;
mm_float4 periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ;
mm_double4 periodicBoxSizeDouble, invPeriodicBoxSizeDouble, periodicBoxVecXDouble, periodicBoxVecYDouble, periodicBoxVecZDouble;
std::string defaultOptimizationOptions;
std::map<std::string, std::string> compilationDefines;
cl::Context context;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -336,6 +336,54 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["LOG"] = "log";
}
// Set defines for applying periodic boundary conditions.
Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
boxIsTriclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
if (boxIsTriclinic) {
compilationDefines["APPLY_PERIODIC_TO_DELTA(delta)"] =
"{"
"real scale3 = floor(delta.z*invPeriodicBoxSize.z+0.5f); \\\n"
"delta.xyz -= scale3*periodicBoxVecZ.xyz; \\\n"
"real scale2 = floor(delta.y*invPeriodicBoxSize.y+0.5f); \\\n"
"delta.xy -= scale2*periodicBoxVecY.xy; \\\n"
"real scale1 = floor(delta.x*invPeriodicBoxSize.x+0.5f); \\\n"
"delta.x -= scale1*periodicBoxVecX.x;}";
compilationDefines["APPLY_PERIODIC_TO_POS(pos)"] =
"{"
"real scale3 = floor(pos.z*invPeriodicBoxSize.z); \\\n"
"pos.xyz -= scale3*periodicBoxVecZ.xyz; \\\n"
"real scale2 = floor(pos.y*invPeriodicBoxSize.y); \\\n"
"pos.xy -= scale2*periodicBoxVecY.xy; \\\n"
"real scale1 = floor(pos.x*invPeriodicBoxSize.x); \\\n"
"pos.x -= scale1*periodicBoxVecX.x;}";
compilationDefines["APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"] =
"{"
"real scale3 = floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f); \\\n"
"pos.x -= scale3*periodicBoxVecZ.x; \\\n"
"pos.y -= scale3*periodicBoxVecZ.y; \\\n"
"pos.z -= scale3*periodicBoxVecZ.z; \\\n"
"real scale2 = floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f); \\\n"
"pos.x -= scale2*periodicBoxVecY.x; \\\n"
"pos.y -= scale2*periodicBoxVecY.y; \\\n"
"real scale1 = floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f); \\\n"
"pos.x -= scale1*periodicBoxVecX.x;}";
}
else {
compilationDefines["APPLY_PERIODIC_TO_DELTA(delta)"] =
"delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;";
compilationDefines["APPLY_PERIODIC_TO_POS(pos)"] =
"pos.xyz -= floor(pos.xyz*invPeriodicBoxSize.xyz)*periodicBoxSize.xyz;";
compilationDefines["APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"] =
"{"
"pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; \\\n"
"pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; \\\n"
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}";
}
// Create the work thread used for parallelization when running on multiple devices.
thread = new WorkThread();
......@@ -527,7 +575,7 @@ void OpenCLContext::restoreDefaultQueue() {
currentQueue = defaultQueue;
}
string OpenCLContext::doubleToString(double value) {
string OpenCLContext::doubleToString(double value) const {
stringstream s;
s.precision(useDoublePrecision ? 16 : 8);
s << scientific << value;
......@@ -536,7 +584,7 @@ string OpenCLContext::doubleToString(double value) {
return s.str();
}
string OpenCLContext::intToString(int value) {
string OpenCLContext::intToString(int value) const {
stringstream s;
s << value;
return s.str();
......@@ -1039,16 +1087,21 @@ void OpenCLContext::reorderAtomsImpl() {
// Move each molecule position into the same box.
for (int i = 0; i < numMolecules; i++) {
int xcell = (int) floor(molPos[i].x*invPeriodicBoxSizeDouble.x);
int ycell = (int) floor(molPos[i].y*invPeriodicBoxSizeDouble.y);
int zcell = (int) floor(molPos[i].z*invPeriodicBoxSizeDouble.z);
Real dx = xcell*periodicBoxSizeDouble.x;
Real dy = ycell*periodicBoxSizeDouble.y;
Real dz = zcell*periodicBoxSizeDouble.z;
if (dx != 0.0f || dy != 0.0f || dz != 0.0f) {
molPos[i].x -= dx;
molPos[i].y -= dy;
molPos[i].z -= dz;
Real4 center = molPos[i];
int zcell = (int) floor(center.z*invPeriodicBoxSize.z);
center.x -= zcell*periodicBoxVecZ.x;
center.y -= zcell*periodicBoxVecZ.y;
center.z -= zcell*periodicBoxVecZ.z;
int ycell = (int) floor(center.y*invPeriodicBoxSize.y);
center.x -= ycell*periodicBoxVecY.x;
center.y -= ycell*periodicBoxVecY.y;
int xcell = (int) floor(center.x*invPeriodicBoxSize.x);
center.x -= xcell*periodicBoxVecX.x;
if (xcell != 0 || ycell != 0 || zcell != 0) {
Real dx = molPos[i].x-center.x;
Real dy = molPos[i].y-center.y;
Real dz = molPos[i].z-center.z;
molPos[i] = center;
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom];
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -71,11 +71,21 @@ static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int ind
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxSize());
}
static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getInvPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize());
static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision()) {
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxSizeDouble());
kernel.setArg<mm_double4>(index++, cl.getInvPeriodicBoxSizeDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecXDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecYDouble());
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxVecZDouble());
}
else {
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxSize());
kernel.setArg<mm_float4>(index++, cl.getInvPeriodicBoxSize());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecX());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecY());
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxVecZ());
}
}
static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
......@@ -161,14 +171,15 @@ void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3
const vector<cl_int>& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
mm_double4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else if (cl.getUseMixedPrecision()) {
......@@ -180,7 +191,7 @@ void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3
mm_float4 pos1 = posq[i];
mm_float4 pos2 = posCorrection[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3((double)pos1.x+(double)pos2.x-offset.x*periodicBoxSize.x, (double)pos1.y+(double)pos2.y-offset.y*periodicBoxSize.y, (double)pos1.z+(double)pos2.z-offset.z*periodicBoxSize.z);
positions[order[i]] = Vec3((double)pos1.x+(double)pos2.x, (double)pos1.y+(double)pos2.y, (double)pos1.z+(double)pos2.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else {
......@@ -189,7 +200,7 @@ void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3
for (int i = 0; i < numParticles; ++i) {
mm_float4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
}
......@@ -324,20 +335,17 @@ void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>&
}
void OpenCLUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
mm_double4 box = cl.getPeriodicBoxSizeDouble();
a = Vec3(box.x, 0, 0);
b = Vec3(0, box.y, 0);
c = Vec3(0, 0, box.z);
cl.getPeriodicBoxVectors(a, b, c);
}
void OpenCLUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
vector<OpenCLContext*>& contexts = cl.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(a[0], b[1], c[2]);
contexts[i]->setPeriodicBoxVectors(a, b, c);
}
void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
int version = 1;
int version = 2;
stream.write((char*) &version, sizeof(int));
int precision = (cl.getUseDoublePrecision() ? 2 : cl.getUseMixedPrecision() ? 1 : 0);
stream.write((char*) &precision, sizeof(int));
......@@ -358,8 +366,9 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
stream.write(buffer, cl.getVelm().getSize()*cl.getVelm().getElementSize());
stream.write((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size());
stream.write((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size());
mm_float4 box = cl.getPeriodicBoxSize();
stream.write((char*) &box, sizeof(mm_float4));
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
stream.write((char*) boxVectors, 3*sizeof(Vec3));
cl.getIntegrationUtilities().createCheckpoint(stream);
SimTKOpenMMUtilities::createCheckpoint(stream);
}
......@@ -367,7 +376,7 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
int version;
stream.read((char*) &version, sizeof(int));
if (version != 1)
if (version != 2)
throw OpenMMException("Checkpoint was created with a different version of OpenMM");
int precision;
stream.read((char*) &precision, sizeof(int));
......@@ -397,10 +406,10 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream&
stream.read((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size());
cl.getAtomIndexArray().upload(cl.getAtomIndex());
stream.read((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size());
mm_float4 box;
stream.read((char*) &box, sizeof(mm_float4));
Vec3 boxVectors[3];
stream.read((char*) &boxVectors, 3*sizeof(Vec3));
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z);
contexts[i]->setPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
cl.getIntegrationUtilities().loadCheckpoint(stream);
SimTKOpenMMUtilities::loadCheckpoint(stream);
for (int i = 0; i < (int) cl.getReorderListeners().size(); i++)
......@@ -1364,8 +1373,8 @@ public:
PmePreComputation(OpenCLContext& cl, Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : cl(cl), pme(pme), io(io) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
Vec3 boxSize(cl.getPeriodicBoxSize().x, cl.getPeriodicBoxSize().y, cl.getPeriodicBoxSize().z);
pme.getAs<CalcPmeReciprocalForceKernel>().beginComputation(io, boxSize, includeEnergy);
Vec3 boxVectors[3] = {Vec3(cl.getPeriodicBoxSize().x, 0, 0), Vec3(0, cl.getPeriodicBoxSize().y, 0), Vec3(0, 0, cl.getPeriodicBoxSize().z)};
pme.getAs<CalcPmeReciprocalForceKernel>().beginComputation(io, boxVectors, includeEnergy);
}
private:
OpenCLContext& cl;
......@@ -1772,7 +1781,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(5, pmeAtomGridIndex->getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(7, pmeAtomGridIndex->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid->getDeviceBuffer());
......@@ -1801,44 +1810,105 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (pmeGrid != NULL && includeReciprocal) {
if (usePmeQueue)
cl.setQueue(pmeQueue);
// Invert the periodic box vectors.
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
double scale = 1.0/determinant;
mm_double4 recipBoxVectors[3];
recipBoxVectors[0] = mm_double4(boxVectors[1][1]*boxVectors[2][2]*scale, 0, 0, 0);
recipBoxVectors[1] = mm_double4(-boxVectors[1][0]*boxVectors[2][2]*scale, boxVectors[0][0]*boxVectors[2][2]*scale, 0, 0);
recipBoxVectors[2] = mm_double4((boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0])*scale, -boxVectors[0][0]*boxVectors[2][1]*scale, boxVectors[0][0]*boxVectors[1][1]*scale, 0);
mm_float4 recipBoxVectorsFloat[3];
for (int i = 0; i < 3; i++)
recipBoxVectorsFloat[i] = mm_float4((float) recipBoxVectors[i].x, (float) recipBoxVectors[i].y, (float) recipBoxVectors[i].z, 0);
// Execute the reciprocal space kernels.
setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4);
setInvPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeUpdateBsplinesKernel.setArg<mm_double4>(5, recipBoxVectors[0]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(6, recipBoxVectors[1]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(7, recipBoxVectors[2]);
}
else {
pmeUpdateBsplinesKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
setInvPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 6);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
sort->sort(*pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
setInvPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 6);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(7, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(8, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
}
else {
setPeriodicBoxSizeArg(cl, pmeAtomRangeKernel, 3);
setInvPeriodicBoxSizeArg(cl, pmeAtomRangeKernel, 4);
cl.executeKernel(pmeAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeZIndexKernel, 2);
setInvPeriodicBoxSizeArg(cl, pmeZIndexKernel, 3);
if (cl.getUseDoublePrecision())
pmeZIndexKernel.setArg<mm_double4>(3, recipBoxVectors[2]);
else
pmeZIndexKernel.setArg<mm_float4>(3, recipBoxVectorsFloat[2]);
cl.executeKernel(pmeZIndexKernel, cl.getNumAtoms());
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
}
}
fft->execFFT(*pmeGrid, *pmeGrid2, true);
setInvPeriodicBoxSizeArg(cl, pmeConvolutionKernel, 5);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
double scaleFactor = 1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z);
if (cl.getUseDoublePrecision())
pmeConvolutionKernel.setArg<cl_double>(6, scaleFactor);
else
pmeConvolutionKernel.setArg<cl_float>(6, (float) scaleFactor);
if (cl.getUseDoublePrecision()) {
pmeConvolutionKernel.setArg<mm_double4>(5, recipBoxVectors[0]);
pmeConvolutionKernel.setArg<mm_double4>(6, recipBoxVectors[1]);
pmeConvolutionKernel.setArg<mm_double4>(7, recipBoxVectors[2]);
pmeConvolutionKernel.setArg<cl_double>(8, scaleFactor);
}
else {
pmeConvolutionKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]);
pmeConvolutionKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]);
pmeConvolutionKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]);
pmeConvolutionKernel.setArg<cl_float>(8, (float) scaleFactor);
}
cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms());
fft->execFFT(*pmeGrid2, *pmeGrid, false);
setPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 3);
setInvPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 4);
if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
pmeInterpolateForceKernel.setArg<mm_double4>(5, recipBoxVectors[1]);
pmeInterpolateForceKernel.setArg<mm_double4>(6, recipBoxVectors[2]);
}
else {
pmeInterpolateForceKernel.setArg<mm_float4>(4, recipBoxVectorsFloat[0]);
pmeInterpolateForceKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[1]);
pmeInterpolateForceKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[2]);
}
if (deviceIsCpu)
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
else
......@@ -2390,8 +2460,8 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData->getDeviceBuffer());
setPeriodicBoxSizeArg(cl, interactionGroupKernel, index++);
setInvPeriodicBoxSizeArg(cl, interactionGroupKernel, index++);
setPeriodicBoxArgs(cl, interactionGroupKernel, index);
index += 5;
for (int i = 0; i < (int) params->getBuffers().size(); i++)
interactionGroupKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
if (globals != NULL)
......@@ -2563,7 +2633,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if (nb.getUseCutoff()) {
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed.
index += 5; // The periodic box size arguments are set when the kernel is executed.
computeBornSumKernel.setArg<cl_uint>(index++, maxTiles);
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
......@@ -2582,7 +2652,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if (nb.getUseCutoff()) {
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed.
index += 5; // The periodic box size arguments are set when the kernel is executed.
force1Kernel.setArg<cl_uint>(index++, maxTiles);
force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
......@@ -2615,18 +2685,16 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain->getDeviceBuffer());
}
if (nb.getUseCutoff()) {
setPeriodicBoxSizeArg(cl, computeBornSumKernel, 5);
setInvPeriodicBoxSizeArg(cl, computeBornSumKernel, 6);
setPeriodicBoxSizeArg(cl, force1Kernel, 7);
setInvPeriodicBoxSizeArg(cl, force1Kernel, 8);
setPeriodicBoxArgs(cl, computeBornSumKernel, 5);
setPeriodicBoxArgs(cl, force1Kernel, 7);
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
computeBornSumKernel.setArg<cl::Buffer>(3, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl_uint>(7, maxTiles);
computeBornSumKernel.setArg<cl::Buffer>(10, nb.getInteractingAtoms().getDeviceBuffer());
computeBornSumKernel.setArg<cl_uint>(10, maxTiles);
computeBornSumKernel.setArg<cl::Buffer>(13, nb.getInteractingAtoms().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(5, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl_uint>(9, maxTiles);
force1Kernel.setArg<cl::Buffer>(12, nb.getInteractingAtoms().getDeviceBuffer());
force1Kernel.setArg<cl_uint>(12, maxTiles);
force1Kernel.setArg<cl::Buffer>(15, nb.getInteractingAtoms().getDeviceBuffer());
}
}
cl.executeKernel(computeBornSumKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
......@@ -3440,7 +3508,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if (nb.getUseCutoff()) {
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // Periodic box size arguments are set when the kernel is executed.
index += 5; // Periodic box size arguments are set when the kernel is executed.
pairValueKernel.setArg<cl_uint>(index++, maxTiles);
pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
......@@ -3483,7 +3551,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if (nb.getUseCutoff()) {
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // Periodic box size arguments are set when the kernel is executed.
index += 5; // Periodic box size arguments are set when the kernel is executed.
pairEnergyKernel.setArg<cl_uint>(index++, maxTiles);
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
......@@ -3567,18 +3635,16 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
globals->upload(globalParamValues);
}
if (nb.getUseCutoff()) {
setPeriodicBoxSizeArg(cl, pairValueKernel, 8);
setInvPeriodicBoxSizeArg(cl, pairValueKernel, 9);
setPeriodicBoxSizeArg(cl, pairEnergyKernel, 9);
setInvPeriodicBoxSizeArg(cl, pairEnergyKernel, 10);
setPeriodicBoxArgs(cl, pairValueKernel, 8);
setPeriodicBoxArgs(cl, pairEnergyKernel, 9);
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
pairValueKernel.setArg<cl::Buffer>(6, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl_uint>(10, maxTiles);
pairValueKernel.setArg<cl::Buffer>(13, nb.getInteractingAtoms().getDeviceBuffer());
pairValueKernel.setArg<cl_uint>(13, maxTiles);
pairValueKernel.setArg<cl::Buffer>(16, nb.getInteractingAtoms().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(7, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl_uint>(11, maxTiles);
pairEnergyKernel.setArg<cl::Buffer>(14, nb.getInteractingAtoms().getDeviceBuffer());
pairEnergyKernel.setArg<cl_uint>(14, maxTiles);
pairEnergyKernel.setArg<cl::Buffer>(17, nb.getInteractingAtoms().getDeviceBuffer());
}
}
cl.executeKernel(pairValueKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
......@@ -4216,7 +4282,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
donorKernel.setArg<cl::Buffer>(index++, acceptors->getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices->getDeviceBuffer());
donorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 2; // Periodic box size arguments are set when the kernel is executed.
index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals != NULL)
donorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
......@@ -4238,7 +4304,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
acceptorKernel.setArg<cl::Buffer>(index++, acceptors->getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices->getDeviceBuffer());
acceptorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 2; // Periodic box size arguments are set when the kernel is executed.
index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals != NULL)
acceptorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
......@@ -4252,11 +4318,9 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
acceptorKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
}
setPeriodicBoxSizeArg(cl, donorKernel, 8);
setInvPeriodicBoxSizeArg(cl, donorKernel, 9);
setPeriodicBoxArgs(cl, donorKernel, 8);
cl.executeKernel(donorKernel, max(numDonors, numAcceptors));
setPeriodicBoxSizeArg(cl, acceptorKernel, 8);
setInvPeriodicBoxSizeArg(cl, acceptorKernel, 9);
setPeriodicBoxArgs(cl, acceptorKernel, 8);
cl.executeKernel(acceptorKernel, max(numDonors, numAcceptors));
return 0.0;
}
......@@ -4843,7 +4907,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
const vector<int>& atoms = iter->second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
if (computedDeltas.count(deltaName) == 0) {
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName);
}
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
......@@ -4857,11 +4921,11 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName2);
}
compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
......@@ -4878,15 +4942,15 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName2);
}
if (computedDeltas.count(deltaName3) == 0) {
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName3);
}
compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
......@@ -5056,7 +5120,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
if (!centralParticleMode) {
for (int i = 1; i < particlesPerSet; i++) {
for (int j = i+1; j < particlesPerSet; j++)
verifyCutoff<<"includeInteraction &= (delta(pos"<<(i+1)<<", pos"<<(j+1)<<", periodicBoxSize, invPeriodicBoxSize).w < CUTOFF_SQUARED);\n";
verifyCutoff<<"includeInteraction &= (delta(pos"<<(i+1)<<", pos"<<(j+1)<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ).w < CUTOFF_SQUARED);\n";
}
}
}
......@@ -5129,8 +5193,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
forceKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
setPeriodicBoxSizeArg(cl, forceKernel, index++);
setInvPeriodicBoxSizeArg(cl, forceKernel, index++);
setPeriodicBoxArgs(cl, forceKernel, index);
index += 5;
if (nonbondedMethod != NoCutoff) {
forceKernel.setArg<cl::Buffer>(index++, neighbors->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex->getDeviceBuffer());
......@@ -5157,8 +5221,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
// Set arguments for the block bounds kernel.
index = 0;
setPeriodicBoxSizeArg(cl, blockBoundsKernel, index++);
setInvPeriodicBoxSizeArg(cl, blockBoundsKernel, index++);
setPeriodicBoxArgs(cl, blockBoundsKernel, index);
index += 5;
blockBoundsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer());
......@@ -5167,8 +5231,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
// Set arguments for the neighbor list kernel.
index = 0;
setPeriodicBoxSizeArg(cl, neighborsKernel, index++);
setInvPeriodicBoxSizeArg(cl, neighborsKernel, index++);
setPeriodicBoxArgs(cl, neighborsKernel, index);
index += 5;
neighborsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer());
......@@ -5214,7 +5278,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
int* numPairs = (int*) cl.getPinnedBuffer();
cl::Event event;
if (nonbondedMethod != NoCutoff) {
neighborsKernel.setArg<int>(8, maxNeighborPairs);
neighborsKernel.setArg<int>(11, maxNeighborPairs);
startIndicesKernel.setArg<int>(3, maxNeighborPairs);
copyPairsKernel.setArg<int>(3, maxNeighborPairs);
cl.executeKernel(blockBoundsKernel, cl.getNumAtomBlocks());
......@@ -5244,8 +5308,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs = OpenCLArray::create<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors = OpenCLArray::create<int>(cl, maxNeighborPairs, "customManyParticleNeighbors");
forceKernel.setArg<cl::Buffer>(5, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(5, neighborPairs->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(8, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, neighborPairs->getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs->getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(1, neighbors->getDeviceBuffer());
continue;
......@@ -6641,17 +6705,16 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
// Initialize the kernel arguments.
kernel.setArg<cl_int>(3, numMolecules);
kernel.setArg<cl::Buffer>(6, cl.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(7, moleculeAtoms->getDeviceBuffer());
kernel.setArg<cl::Buffer>(8, moleculeStartIndex->getDeviceBuffer());
kernel.setArg<cl::Buffer>(9, cl.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(10, moleculeAtoms->getDeviceBuffer());
kernel.setArg<cl::Buffer>(11, moleculeStartIndex->getDeviceBuffer());
}
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions->getDeviceBuffer(), 0, 0, bytesToCopy);
kernel.setArg<cl_float>(0, (cl_float) scaleX);
kernel.setArg<cl_float>(1, (cl_float) scaleY);
kernel.setArg<cl_float>(2, (cl_float) scaleZ);
setPeriodicBoxSizeArg(cl, kernel, 4);
setInvPeriodicBoxSizeArg(cl, kernel, 5);
setPeriodicBoxArgs(cl, kernel, 4);
cl.executeKernel(kernel, cl.getNumAtoms());
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -325,11 +325,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
cl::Program interactingBlocksProgram = context.createProgram(file, defines);
findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms());
findBlockBoundsKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(4, blockCenter->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(5, blockBoundingBox->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(6, rebuildNeighborList->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(7, sortedBlocks->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(6, context.getPosq().getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(7, blockCenter->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(8, blockBoundingBox->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(9, rebuildNeighborList->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(10, sortedBlocks->getDeviceBuffer());
sortBoxDataKernel = cl::Kernel(interactingBlocksProgram, "sortBoxData");
sortBoxDataKernel.setArg<cl::Buffer>(0, sortedBlocks->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(1, blockCenter->getDeviceBuffer());
......@@ -341,20 +341,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortBoxDataKernel.setArg<cl::Buffer>(7, interactionCount->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(8, rebuildNeighborList->getDeviceBuffer());
findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
findInteractingBlocksKernel.setArg<cl::Buffer>(2, interactionCount->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(3, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(4, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(5, context.getPosq().getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(6, interactingTiles->getSize());
findInteractingBlocksKernel.setArg<cl_uint>(7, startBlockIndex);
findInteractingBlocksKernel.setArg<cl_uint>(8, numBlocks);
findInteractingBlocksKernel.setArg<cl::Buffer>(9, sortedBlocks->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(10, sortedBlockCenter->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(11, sortedBlockBoundingBox->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(12, exclusionIndices->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(13, exclusionRowIndices->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(14, oldPositions->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(15, rebuildNeighborList->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(8, context.getPosq().getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(9, interactingTiles->getSize());
findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
findInteractingBlocksKernel.setArg<cl::Buffer>(12, sortedBlocks->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(13, sortedBlockCenter->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(14, sortedBlockBoundingBox->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(15, exclusionIndices->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(16, exclusionRowIndices->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(17, oldPositions->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(18, rebuildNeighborList->getDeviceBuffer());
if (findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) {
// The device can't handle this block size, so reduce it.
......@@ -369,18 +369,21 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
}
}
static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxSize());
}
static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getInvPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize());
static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision()) {
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxSizeDouble());
kernel.setArg<mm_double4>(index++, cl.getInvPeriodicBoxSizeDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecXDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecYDouble());
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxVecZDouble());
}
else {
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxSize());
kernel.setArg<mm_float4>(index++, cl.getInvPeriodicBoxSize());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecX());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecY());
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxVecZ());
}
}
void OpenCLNonbondedUtilities::prepareInteractions() {
......@@ -397,22 +400,18 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list.
setPeriodicBoxSizeArg(context, findBlockBoundsKernel, 1);
setInvPeriodicBoxSizeArg(context, findBlockBoundsKernel, 2);
setPeriodicBoxArgs(context, findBlockBoundsKernel, 1);
context.executeKernel(findBlockBoundsKernel, context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
context.executeKernel(sortBoxDataKernel, context.getNumAtoms());
setPeriodicBoxSizeArg(context, findInteractingBlocksKernel, 0);
setInvPeriodicBoxSizeArg(context, findInteractingBlocksKernel, 1);
setPeriodicBoxArgs(context, findInteractingBlocksKernel, 0);
context.executeKernel(findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
}
void OpenCLNonbondedUtilities::computeInteractions() {
if (kernelSource.size() > 0) {
if (useCutoff) {
setPeriodicBoxSizeArg(context, forceKernel, 9);
setInvPeriodicBoxSizeArg(context, forceKernel, 10);
}
if (useCutoff)
setPeriodicBoxArgs(context, forceKernel, 9);
context.executeKernel(forceKernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
if (context.getComputeForceCount() == 1)
updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
......@@ -441,11 +440,11 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(11, maxTiles);
forceKernel.setArg<cl::Buffer>(14, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(3, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(4, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(6, maxTiles);
forceKernel.setArg<cl_uint>(14, maxTiles);
forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
int numAtoms = context.getNumAtoms();
if (context.getUseDoublePrecision()) {
vector<mm_double4> oldPositionsVec(numAtoms, mm_double4(1e30, 1e30, 1e30, 0));
......@@ -473,8 +472,8 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
forceKernel.setArg<cl_uint>(5, startTileIndex);
forceKernel.setArg<cl_uint>(6, numTiles);
findInteractingBlocksKernel.setArg<cl_uint>(7, startBlockIndex);
findInteractingBlocksKernel.setArg<cl_uint>(8, numBlocks);
findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
}
}
......@@ -617,7 +616,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
if (useCutoff) {
kernel.setArg<cl::Buffer>(index++, interactingTiles->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, interactionCount->getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed.
index += 5; // The periodic box size arguments are set when the kernel is executed.
kernel.setArg<cl_uint>(index++, interactingTiles->getSize());
kernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer());
......
......@@ -20,8 +20,9 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else
unsigned int numTiles
#endif
......@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -110,7 +111,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -266,10 +267,8 @@ __kernel void computeN2Energy(
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
local_posq[get_local_id(0)].x -= floor((local_posq[get_local_id(0)].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
local_posq[get_local_id(0)].y -= floor((local_posq[get_local_id(0)].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
local_posq[get_local_id(0)].z -= floor((local_posq[get_local_id(0)].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(local_posq[get_local_id(0)], blockCenterX)
SYNC_WARPS;
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
......@@ -310,7 +309,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......
......@@ -20,8 +20,9 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else
unsigned int numTiles
#endif
......@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
......@@ -126,7 +127,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
......@@ -272,7 +273,7 @@ __kernel void computeN2Energy(
real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz -= floor((local_posq[tgx].xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_POS_WITH_CENTER(local_posq[tgx], blockCenterX)
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0;
......@@ -332,7 +333,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
......
......@@ -14,8 +14,9 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif
__local real* restrict local_value,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else
unsigned int numTiles
#endif
......@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -100,7 +101,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -239,10 +240,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
local_posq[get_local_id(0)].x -= floor((local_posq[get_local_id(0)].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
local_posq[get_local_id(0)].y -= floor((local_posq[get_local_id(0)].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
local_posq[get_local_id(0)].z -= floor((local_posq[get_local_id(0)].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(local_posq[get_local_id(0)], blockCenterX)
SYNC_WARPS;
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
......@@ -278,7 +277,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......
......@@ -14,8 +14,9 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif
__local real* restrict local_value,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else
unsigned int numTiles
#endif
......@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
......@@ -109,7 +110,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
......@@ -239,7 +240,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz -= floor((local_posq[tgx].xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_POS_WITH_CENTER(local_posq[tgx], blockCenterX)
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx;
real value = 0;
......@@ -287,7 +288,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
......
......@@ -11,12 +11,10 @@ real4 delta(real4 vec1, real4 vec2) {
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
*/
real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
#ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(result)
#endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
......@@ -56,7 +54,8 @@ real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors.
*/
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
real energy = 0;
real4 f1 = (real4) 0;
......@@ -102,7 +101,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
......@@ -144,7 +143,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
* Compute forces on acceptors.
*/
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
real4 f1 = (real4) 0;
real4 f2 = (real4) 0;
......@@ -189,7 +189,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
real4 d1 = posBuffer[3*index];
real4 d2 = posBuffer[3*index+1];
real4 d3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
......
......@@ -14,12 +14,10 @@ inline void storeForce(int atom, real4 force, __global long* restrict forceBuffe
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
*/
inline real4 delta(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
inline real4 delta(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(result)
#endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
......@@ -75,7 +73,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
*/
__kernel void computeInteraction(
__global long* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq,
real4 periodicBoxSize, real4 invPeriodicBoxSize
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#ifdef USE_CUTOFF
, __global const int* restrict neighbors, __global const int* restrict neighborStartIndex
#endif
......@@ -138,16 +136,14 @@ __kernel void computeInteraction(
/**
* Find a bounding box for the atoms in each block.
*/
__kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const real4* restrict posq,
__global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict numNeighborPairs) {
__kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const real4* restrict posq, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict numNeighborPairs) {
int index = get_global_id(0);
int base = index*TILE_SIZE;
while (base < NUM_ATOMS) {
real4 pos = posq[base];
#ifdef USE_PERIODIC
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS(pos)
#endif
real4 minPos = pos;
real4 maxPos = pos;
......@@ -156,9 +152,7 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
pos = posq[i];
#ifdef USE_PERIODIC
real4 center = 0.5f*(maxPos+minPos);
pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
#endif
minPos = (real4) (min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0);
maxPos = (real4) (max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0);
......@@ -176,8 +170,8 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
/**
* Find a list of neighbors for each atom.
*/
__kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const real4* restrict posq,
__global const real4* restrict blockCenter, __global const real4* restrict blockBoundingBox, __global int2* restrict neighborPairs,
__kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const real4* restrict posq, __global const real4* restrict blockCenter, __global const real4* restrict blockBoundingBox, __global int2* restrict neighborPairs,
__global int* restrict numNeighborPairs, __global int* restrict numNeighborsForAtom, int maxNeighborPairs
#ifdef USE_EXCLUSIONS
, __global int* restrict exclusions, __global int* restrict exclusionStartIndex
......@@ -212,9 +206,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
real4 blockSize2 = blockBoundingBox[block2];
real4 blockDelta = blockCenter1-blockCenter2;
#ifdef USE_PERIODIC
blockDelta.x -= floor(blockDelta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
blockDelta.y -= floor(blockDelta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
blockDelta.z -= floor(blockDelta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(blockDelta)
#endif
blockDelta.x = max((real) 0, fabs(blockDelta.x)-blockSize1.x-blockSize2.x);
blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSize1.y-blockSize2.y);
......@@ -243,7 +235,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
// Decide whether to include this atom pair in the neighbor list.
real4 atomDelta = delta(pos1, pos2, periodicBoxSize, invPeriodicBoxSize);
real4 atomDelta = delta(pos1, pos2, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CENTRAL_PARTICLE
bool includeAtom = (atom2 != atom1 && atom2 < NUM_ATOMS && atomDelta.w < CUTOFF_SQUARED);
#else
......
......@@ -42,8 +42,8 @@ __kernel void computeInteractionGroups(
#else
__global real4* restrict forceBuffers,
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq,
__global const int4* restrict groupData, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict groupData,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex
......@@ -82,7 +82,7 @@ __kernel void computeInteractionGroups(
posq2 = (real4) (localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment