Commit 6eb302fb authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement CUDA support for triclinic boxes

parent 61458a5f
......@@ -2380,6 +2380,9 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs.push_back(&interactionGroupData->getDevicePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecYPointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
interactionGroupArgs.push_back(&params->getBuffers()[i].getMemory());
if (globals != NULL)
......@@ -2539,6 +2542,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
computeSumArgs.push_back(cu.getPeriodicBoxSizePointer());
computeSumArgs.push_back(cu.getInvPeriodicBoxSizePointer());
computeSumArgs.push_back(cu.getPeriodicBoxVecXPointer());
computeSumArgs.push_back(cu.getPeriodicBoxVecYPointer());
computeSumArgs.push_back(cu.getPeriodicBoxVecZPointer());
computeSumArgs.push_back(&maxTiles);
computeSumArgs.push_back(&nb.getBlockCenters().getDevicePointer());
computeSumArgs.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
......@@ -2558,6 +2564,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
force1Args.push_back(&nb.getInteractionCount().getDevicePointer());
force1Args.push_back(cu.getPeriodicBoxSizePointer());
force1Args.push_back(cu.getInvPeriodicBoxSizePointer());
force1Args.push_back(cu.getPeriodicBoxVecXPointer());
force1Args.push_back(cu.getPeriodicBoxVecYPointer());
force1Args.push_back(cu.getPeriodicBoxVecZPointer());
force1Args.push_back(&maxTiles);
force1Args.push_back(&nb.getBlockCenters().getDevicePointer());
force1Args.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
......@@ -2574,8 +2583,8 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
maxTiles = nb.getInteractingTiles().getSize();
computeSumArgs[3] = &nb.getInteractingTiles().getDevicePointer();
force1Args[5] = &nb.getInteractingTiles().getDevicePointer();
computeSumArgs[10] = &nb.getInteractingAtoms().getDevicePointer();
force1Args[12] = &nb.getInteractingAtoms().getDevicePointer();
computeSumArgs[13] = &nb.getInteractingAtoms().getDevicePointer();
force1Args[15] = &nb.getInteractingAtoms().getDevicePointer();
}
}
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
......@@ -3344,6 +3353,9 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
pairValueArgs.push_back(cu.getPeriodicBoxSizePointer());
pairValueArgs.push_back(cu.getInvPeriodicBoxSizePointer());
pairValueArgs.push_back(cu.getPeriodicBoxVecXPointer());
pairValueArgs.push_back(cu.getPeriodicBoxVecYPointer());
pairValueArgs.push_back(cu.getPeriodicBoxVecZPointer());
pairValueArgs.push_back(&maxTiles);
pairValueArgs.push_back(&nb.getBlockCenters().getDevicePointer());
pairValueArgs.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
......@@ -3379,6 +3391,9 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
pairEnergyArgs.push_back(&nb.getInteractionCount().getDevicePointer());
pairEnergyArgs.push_back(cu.getPeriodicBoxSizePointer());
pairEnergyArgs.push_back(cu.getInvPeriodicBoxSizePointer());
pairEnergyArgs.push_back(cu.getPeriodicBoxVecXPointer());
pairEnergyArgs.push_back(cu.getPeriodicBoxVecYPointer());
pairEnergyArgs.push_back(cu.getPeriodicBoxVecZPointer());
pairEnergyArgs.push_back(&maxTiles);
pairEnergyArgs.push_back(&nb.getBlockCenters().getDevicePointer());
pairEnergyArgs.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
......@@ -3444,8 +3459,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
maxTiles = nb.getInteractingTiles().getSize();
pairValueArgs[4] = &nb.getInteractingTiles().getDevicePointer();
pairEnergyArgs[5] = &nb.getInteractingTiles().getDevicePointer();
pairValueArgs[11] = &nb.getInteractingAtoms().getDevicePointer();
pairEnergyArgs[12] = &nb.getInteractingAtoms().getDevicePointer();
pairValueArgs[14] = &nb.getInteractingAtoms().getDevicePointer();
pairEnergyArgs[15] = &nb.getInteractingAtoms().getDevicePointer();
}
}
cu.executeKernel(pairValueKernel, &pairValueArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
......@@ -4061,6 +4076,9 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
donorArgs.push_back(&acceptors->getDevicePointer());
donorArgs.push_back(cu.getPeriodicBoxSizePointer());
donorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
donorArgs.push_back(cu.getPeriodicBoxVecXPointer());
donorArgs.push_back(cu.getPeriodicBoxVecYPointer());
donorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals != NULL)
donorArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
......@@ -4082,6 +4100,9 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
acceptorArgs.push_back(&acceptors->getDevicePointer());
acceptorArgs.push_back(cu.getPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecXPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecYPointer());
acceptorArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (globals != NULL)
acceptorArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
......@@ -4684,7 +4705,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
const vector<int>& atoms = iter->second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
if (computedDeltas.count(deltaName) == 0) {
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName);
}
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
......@@ -4698,11 +4719,11 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName2);
}
compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
......@@ -4719,15 +4740,15 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName2);
}
if (computedDeltas.count(deltaName3) == 0) {
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<", periodicBoxSize, invPeriodicBoxSize);\n";
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName3);
}
compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
......@@ -4897,7 +4918,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
if (!centralParticleMode) {
for (int i = 1; i < particlesPerSet; i++) {
for (int j = i+1; j < particlesPerSet; j++)
verifyCutoff<<"includeInteraction &= (delta(pos"<<(i+1)<<", pos"<<(j+1)<<", periodicBoxSize, invPeriodicBoxSize).w < CUTOFF_SQUARED);\n";
verifyCutoff<<"includeInteraction &= (delta(pos"<<(i+1)<<", pos"<<(j+1)<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ).w < CUTOFF_SQUARED);\n";
}
}
}
......@@ -4975,6 +4996,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs.push_back(&cu.getPosq().getDevicePointer());
forceArgs.push_back(cu.getPeriodicBoxSizePointer());
forceArgs.push_back(cu.getInvPeriodicBoxSizePointer());
forceArgs.push_back(cu.getPeriodicBoxVecXPointer());
forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
if (nonbondedMethod != NoCutoff) {
forceArgs.push_back(&neighbors->getDevicePointer());
forceArgs.push_back(&neighborStartIndex->getDevicePointer());
......@@ -5000,6 +5024,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
blockBoundsArgs.push_back(cu.getPeriodicBoxSizePointer());
blockBoundsArgs.push_back(cu.getInvPeriodicBoxSizePointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecXPointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecYPointer());
blockBoundsArgs.push_back(cu.getPeriodicBoxVecZPointer());
blockBoundsArgs.push_back(&cu.getPosq().getDevicePointer());
blockBoundsArgs.push_back(&blockCenter->getDevicePointer());
blockBoundsArgs.push_back(&blockBoundingBox->getDevicePointer());
......@@ -5009,6 +5036,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
neighborsArgs.push_back(cu.getPeriodicBoxSizePointer());
neighborsArgs.push_back(cu.getInvPeriodicBoxSizePointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecXPointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecYPointer());
neighborsArgs.push_back(cu.getPeriodicBoxVecZPointer());
neighborsArgs.push_back(&cu.getPosq().getDevicePointer());
neighborsArgs.push_back(&blockCenter->getDevicePointer());
neighborsArgs.push_back(&blockBoundingBox->getDevicePointer());
......@@ -6413,6 +6443,7 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d
float scalefY = (float) scaleY;
float scalefZ = (float) scaleZ;
void* args[] = {&scalefX, &scalefY, &scalefZ, &numMolecules, cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(),
cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&cu.getPosq().getDevicePointer(), &moleculeAtoms->getDevicePointer(), &moleculeStartIndex->getDevicePointer()};
cu.executeKernel(kernel, args, cu.getNumAtoms());
for (int i = 0; i < (int) cu.getPosCellOffsets().size(); i++)
......
......@@ -17,7 +17,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else
unsigned int numTiles
#endif
......@@ -56,9 +57,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -109,9 +108,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -254,12 +251,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
pos1.x -= floor((pos1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
pos1.y -= floor((pos1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos1.z -= floor((pos1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
localData[threadIdx.x].pos.x -= floor((localData[threadIdx.x].pos.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].pos.y -= floor((localData[threadIdx.x].pos.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].pos.z -= floor((localData[threadIdx.x].pos.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x].pos, blockCenterX)
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj;
......@@ -306,9 +299,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......
......@@ -14,7 +14,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else
unsigned int numTiles
#endif
......@@ -51,9 +52,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -100,9 +99,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -229,12 +226,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
pos1.x -= floor((pos1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
pos1.y -= floor((pos1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos1.z -= floor((pos1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
localData[threadIdx.x].pos.x -= floor((localData[threadIdx.x].pos.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].pos.y -= floor((localData[threadIdx.x].pos.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].pos.z -= floor((localData[threadIdx.x].pos.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x].pos, blockCenterX)
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj;
......@@ -268,9 +261,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......
......@@ -25,12 +25,10 @@ inline __device__ real4 delta(real4 vec1, real4 vec2) {
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
*/
inline __device__ real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
inline __device__ real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(result)
#endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
......@@ -69,7 +67,8 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors.
*/
extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
extern __shared__ real4 posBuffer[];
real energy = 0;
......@@ -116,7 +115,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
......@@ -157,7 +156,8 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
* Compute forces on acceptors.
*/
extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
extern __shared__ real4 posBuffer[];
real3 f1 = make_real3(0);
......@@ -203,7 +203,7 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_
real4 d1 = posBuffer[3*index];
real4 d2 = posBuffer[3*index+1];
real4 d3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
......
......@@ -18,12 +18,10 @@ inline __device__ real3 trim(real4 v) {
* Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude.
*/
inline __device__ real4 delta(real3 vec1, real3 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
inline __device__ real4 delta(real3 vec1, real3 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(result)
#endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
......@@ -81,7 +79,7 @@ __constant__ float globals[NUM_GLOBALS];
*/
extern "C" __global__ void computeInteraction(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
real4 periodicBoxSize, real4 invPeriodicBoxSize
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#ifdef USE_CUTOFF
, const int* __restrict__ neighbors, const int* __restrict__ neighborStartIndex
#endif
......@@ -144,16 +142,14 @@ extern "C" __global__ void computeInteraction(
/**
* Find a bounding box for the atoms in each block.
*/
extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, const real4* __restrict__ posq,
real4* __restrict__ blockCenter, real4* __restrict__ blockBoundingBox, int* __restrict__ numNeighborPairs) {
extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
const real4* __restrict__ posq, real4* __restrict__ blockCenter, real4* __restrict__ blockBoundingBox, int* __restrict__ numNeighborPairs) {
int index = blockIdx.x*blockDim.x+threadIdx.x;
int base = index*TILE_SIZE;
while (base < NUM_ATOMS) {
real4 pos = posq[base];
#ifdef USE_PERIODIC
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x;
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS(pos)
#endif
real4 minPos = pos;
real4 maxPos = pos;
......@@ -162,9 +158,7 @@ extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPerio
pos = posq[i];
#ifdef USE_PERIODIC
real4 center = 0.5f*(maxPos+minPos);
pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
#endif
minPos = make_real4(min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0);
maxPos = make_real4(max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0);
......@@ -182,8 +176,8 @@ extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPerio
/**
* Find a list of neighbors for each atom.
*/
extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, const real4* __restrict__ posq,
const real4* __restrict__ blockCenter, const real4* __restrict__ blockBoundingBox, int2* __restrict__ neighborPairs,
extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
const real4* __restrict__ posq, const real4* __restrict__ blockCenter, const real4* __restrict__ blockBoundingBox, int2* __restrict__ neighborPairs,
int* __restrict__ numNeighborPairs, int* __restrict__ numNeighborsForAtom, int maxNeighborPairs
#ifdef USE_EXCLUSIONS
, int* __restrict__ exclusions, int* __restrict__ exclusionStartIndex
......@@ -216,9 +210,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
real4 blockSize2 = blockBoundingBox[block2];
real4 blockDelta = blockCenter1-blockCenter2;
#ifdef USE_PERIODIC
blockDelta.x -= floor(blockDelta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
blockDelta.y -= floor(blockDelta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
blockDelta.z -= floor(blockDelta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(blockDelta)
#endif
blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSize1.x-blockSize2.x);
blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSize1.y-blockSize2.y);
......@@ -247,7 +239,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
// Decide whether to include this atom pair in the neighbor list.
real4 atomDelta = delta(pos1, pos2, periodicBoxSize, invPeriodicBoxSize);
real4 atomDelta = delta(pos1, pos2, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CENTRAL_PARTICLE
bool includeAtom = (atom2 != atom1 && atom2 < NUM_ATOMS && atomDelta.w < CUTOFF_SQUARED);
#else
......
......@@ -10,7 +10,7 @@ typedef struct {
extern "C" __global__ void computeInteractionGroups(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const int4* __restrict__ groupData,
real4 periodicBoxSize, real4 invPeriodicBoxSize
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; // global warpIndex
......@@ -47,9 +47,7 @@ extern "C" __global__ void computeInteractionGroups(
posq2 = make_real4(localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......
......@@ -69,7 +69,8 @@ typedef struct {
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else
unsigned int numTiles,
#endif
......@@ -104,9 +105,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real3 delta = make_real3(localData[tbx+j].x-posq1.x, localData[tbx+j].y-posq1.y, localData[tbx+j].z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -151,9 +150,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = make_real3(localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -291,12 +288,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
posq1.x -= floor((posq1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
posq1.y -= floor((posq1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
posq1.z -= floor((posq1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
localData[threadIdx.x].x -= floor((localData[threadIdx.x].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].y -= floor((localData[threadIdx.x].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].z -= floor((localData[threadIdx.x].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x], blockCenterX)
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = make_real3(localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z);
......@@ -342,9 +335,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for (j = 0; j < TILE_SIZE; j++) {
real3 delta = make_real3(localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[tbx+tj];
......@@ -414,7 +405,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else
unsigned int numTiles,
#endif
......@@ -451,9 +443,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4 posq2 = make_real4(localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -508,9 +498,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......@@ -656,12 +644,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
posq1.x -= floor((posq1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
posq1.y -= floor((posq1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
posq1.z -= floor((posq1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
localData[threadIdx.x].x -= floor((localData[threadIdx.x].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].y -= floor((localData[threadIdx.x].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].z -= floor((localData[threadIdx.x].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x], blockCenterX)
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = atomIndices[tbx+tj];
......@@ -713,9 +697,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4 posq2 = make_real4(localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
......
......@@ -2,7 +2,8 @@
* Scale the particle positions with each axis independent
*/
extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4* __restrict__ posq,
extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize,
real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4* __restrict__ posq,
const int* __restrict__ moleculeAtoms, const int* __restrict__ moleculeStartIndex) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numMolecules; index += blockDim.x*gridDim.x) {
int first = moleculeStartIndex[index];
......@@ -25,13 +26,9 @@ extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scal
// Move it into the first periodic box.
int xcell = (int) floor(center.x*invPeriodicBoxSize.x);
int ycell = (int) floor(center.y*invPeriodicBoxSize.y);
int zcell = (int) floor(center.z*invPeriodicBoxSize.z);
real3 delta = make_real3(xcell*periodicBoxSize.x, ycell*periodicBoxSize.y, zcell*periodicBoxSize.z);
center.x -= delta.x;
center.y -= delta.y;
center.z -= delta.z;
real3 oldCenter = center;
APPLY_PERIODIC_TO_POS(center)
real3 delta = make_real3(oldCenter.x-center.x, oldCenter.y-center.y, oldCenter.z-center.z);
// Now scale the position of the molecule center.
......
......@@ -55,7 +55,17 @@ const double TOL = 1e-5;
CudaPlatform platform;
void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& positions, const vector<const int*>& expectedSets, double boxSize) {
Vec3 computeDelta(const Vec3& pos1, const Vec3& pos2, bool periodic, const Vec3* periodicBoxVectors) {
Vec3 diff = pos1-pos2;
if (periodic) {
diff -= periodicBoxVectors[2]*floor(diff[2]/periodicBoxVectors[2][2]+0.5);
diff -= periodicBoxVectors[1]*floor(diff[1]/periodicBoxVectors[1][1]+0.5);
diff -= periodicBoxVectors[0]*floor(diff[0]/periodicBoxVectors[0][0]+0.5);
}
return diff;
}
void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& positions, const vector<const int*>& expectedSets, double boxSize, bool triclinic) {
// Create a System and Context.
int numParticles = force->getNumParticles();
......@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
Vec3 boxVectors[3];
if (triclinic) {
boxVectors[0] = Vec3(boxSize, 0, 0);
boxVectors[1] = Vec3(0.2*boxSize, boxSize, 0);
boxVectors[2] = Vec3(-0.3*boxSize, -0.1*boxSize, boxSize);
}
else {
boxVectors[0] = Vec3(boxSize, 0, 0);
boxVectors[1] = Vec3(0, boxSize, 0);
boxVectors[2] = Vec3(0, 0, boxSize);
}
system.setDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
system.addForce(force);
VerletIntegrator integrator(0.001);
Context context(system, integrator, platform);
......@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
// See if the energy matches the expected value.
double expectedEnergy = 0;
bool periodic = (nonbondedMethod == CustomManyParticleForce::CutoffPeriodic);
for (int i = 0; i < (int) expectedSets.size(); i++) {
int p1 = expectedSets[i][0];
int p2 = expectedSets[i][1];
int p3 = expectedSets[i][2];
Vec3 d12 = positions[p2]-positions[p1];
Vec3 d13 = positions[p3]-positions[p1];
Vec3 d23 = positions[p3]-positions[p2];
if (nonbondedMethod == CustomManyParticleForce::CutoffPeriodic) {
for (int j = 0; j < 3; j++) {
d12[j] -= floor(d12[j]/boxSize+0.5f)*boxSize;
d13[j] -= floor(d13[j]/boxSize+0.5f)*boxSize;
d23[j] -= floor(d23[j]/boxSize+0.5f)*boxSize;
}
}
Vec3 d12 = computeDelta(positions[p2], positions[p1], periodic, boxVectors);
Vec3 d13 = computeDelta(positions[p3], positions[p1], periodic, boxVectors);
Vec3 d23 = computeDelta(positions[p3], positions[p2], periodic, boxVectors);
double r12 = sqrt(d12.dot(d12));
double r13 = sqrt(d13.dot(d13));
double r23 = sqrt(d23.dot(d23));
......@@ -210,7 +225,7 @@ void testNoCutoff() {
positions.push_back(Vec3(0.4, 0, -0.8));
int sets[4][3] = {{0,1,2}, {1,2,3}, {2,3,0}, {3,0,1}};
vector<const int*> expectedSets(&sets[0], &sets[4]);
validateAxilrodTeller(force, positions, expectedSets, 2.0);
validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
}
void testCutoff() {
......@@ -235,7 +250,7 @@ void testCutoff() {
positions.push_back(Vec3(0.2, 0.5, -0.1));
int sets[7][3] = {{0,1,2}, {0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,2,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[7]);
validateAxilrodTeller(force, positions, expectedSets, 2.0);
validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
}
void testPeriodic() {
......@@ -261,7 +276,33 @@ void testPeriodic() {
double boxSize = 2.1;
int sets[5][3] = {{0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[5]);
validateAxilrodTeller(force, positions, expectedSets, boxSize);
validateAxilrodTeller(force, positions, expectedSets, boxSize, false);
}
void testTriclinic() {
CustomManyParticleForce* force = new CustomManyParticleForce(3,
"C*(1+3*cos(theta1)*cos(theta2)*cos(theta3))/(r12*r13*r23)^3;"
"theta1=angle(p1,p2,p3); theta2=angle(p2,p3,p1); theta3=angle(p3,p1,p2);"
"r12=distance(p1,p2); r13=distance(p1,p3); r23=distance(p2,p3)");
force->addGlobalParameter("C", 1.5);
force->setNonbondedMethod(CustomManyParticleForce::CutoffPeriodic);
force->setCutoffDistance(1.05);
vector<double> params;
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
vector<Vec3> positions;
positions.push_back(Vec3(0, 0, 0));
positions.push_back(Vec3(1, 0, 0));
positions.push_back(Vec3(0, 1.1, 0.3));
positions.push_back(Vec3(0.4, 0, -0.8));
positions.push_back(Vec3(0.2, 0.5, -0.1));
double boxSize = 2.1;
int sets[4][3] = {{0,1,3}, {0,1,4}, {0,3,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[4]);
validateAxilrodTeller(force, positions, expectedSets, boxSize, true);
}
void testExclusions() {
......@@ -286,7 +327,7 @@ void testExclusions() {
force->addExclusion(0, 3);
int sets[5][3] = {{0,1,4}, {1,2,3}, {1,2,4}, {1,3,4}, {2,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[5]);
validateAxilrodTeller(force, positions, expectedSets, 2.0);
validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
}
void testAllTerms() {
......@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) {
testNoCutoff();
testCutoff();
testPeriodic();
testTriclinic();
testExclusions();
testAllTerms();
testParameters();
......
......@@ -236,6 +236,82 @@ void testRandomSeed() {
}
}
void testTriclinic() {
const int numParticles = 64;
const int frequency = 10;
const int steps = 1000;
const double pressure = 1.5;
const double pressureInMD = pressure*(AVOGADRO*1e-25); // pressure in kJ/mol/nm^3
const double temperature = 300.0;
const double initialVolume = numParticles*BOLTZ*temperature/pressureInMD;
const double initialLength = std::pow(initialVolume, 1.0/3.0);
// Create a gas of noninteracting particles.
System system;
Vec3 initialBox[3];
initialBox[0] = Vec3(initialLength, 0, 0);
initialBox[1] = Vec3(0.2*initialLength, initialLength, 0);
initialBox[2] = Vec3(0.1*initialLength, 0.3*initialLength, initialLength);
system.setDefaultPeriodicBoxVectors(initialBox[0], initialBox[1], initialBox[2]);
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
system.addParticle(1.0);
positions[i] = Vec3(initialLength*genrand_real2(sfmt), initialLength*genrand_real2(sfmt), initialLength*genrand_real2(sfmt));
}
MonteCarloAnisotropicBarostat* barostat = new MonteCarloAnisotropicBarostat(Vec3(pressure, pressure, pressure), temperature, true, true, true, frequency);
system.addForce(barostat);
// Run a simulation
LangevinIntegrator integrator(temperature, 0.1, 0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
// Let it equilibrate.
integrator.step(10000);
// Now run it for a while and see if the volume is correct.
double volume = 0.0;
for (int j = 0; j < steps; ++j) {
Vec3 box[3];
context.getState(0).getPeriodicBoxVectors(box[0], box[1], box[2]);
volume += box[0][0]*box[1][1]*box[2][2];
integrator.step(frequency);
}
volume /= steps;
double expected = (numParticles+1)*BOLTZ*temperature/pressureInMD;
ASSERT_USUALLY_EQUAL_TOL(expected, volume, 3/std::sqrt((double) steps));
// Make sure the box vectors have been scaled consistently.
State state = context.getState(State::Positions);
Vec3 box[3];
state.getPeriodicBoxVectors(box[0], box[1], box[2]);
double xscale = box[2][0]/(0.1*initialLength);
double yscale = box[2][1]/(0.3*initialLength);
double zscale = box[2][2]/(1.0*initialLength);
for (int i = 0; i < 3; i++) {
ASSERT_EQUAL_VEC(Vec3(xscale*initialBox[i][0], yscale*initialBox[i][1], zscale*initialBox[i][2]), box[i], 1e-5);
}
// The barostat should have put all particles inside the first periodic box. One integration step
// has happened since then, so they may have moved slightly outside it.
for (int i = 0; i < numParticles; i++) {
Vec3 pos = state.getPositions()[i];
ASSERT(pos[2]/box[2][2] > -1 && pos[2]/box[2][2] < 2);
pos -= box[2]*floor(pos[2]/box[2][2]);
ASSERT(pos[1]/box[1][1] > -1 && pos[1]/box[1][1] < 2);
pos -= box[1]*floor(pos[1]/box[1][1]);
ASSERT(pos[0]/box[0][0] > -1 && pos[0]/box[0][0] < 2);
}
}
/**
* Run a constant pressure simulation on an anisotropic Einstein crystal
* using isotropic and anisotropic barostats. There are a total of 15 simulations:
......@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) {
testIdealGasAxis(1);
testIdealGasAxis(2);
testRandomSeed();
testTriclinic();
//testEinsteinCrystal();
}
catch(const exception& e) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment