"platforms/opencl/include/OpenCLQueue.h" did not exist on "74a8266f4fbe2ac74c41ef84b47b57245e729020"
Commit 3db6b8ee authored by Jason Swails's avatar Jason Swails
Browse files

Merge branch 'master' into gbn-chk

parents 5d8e92ff 3a80b0f1
...@@ -73,6 +73,11 @@ static RealVec& extractBoxSize(ContextImpl& context) { ...@@ -73,6 +73,11 @@ static RealVec& extractBoxSize(ContextImpl& context) {
return *(RealVec*) data->periodicBoxSize; return *(RealVec*) data->periodicBoxSize;
} }
static RealVec* extractBoxVectors(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return (RealVec*) data->periodicBoxVectors;
}
static ReferenceConstraints& extractConstraints(ContextImpl& context) { static ReferenceConstraints& extractConstraints(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData()); ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *(ReferenceConstraints*) data->constraints; return *(ReferenceConstraints*) data->constraints;
...@@ -147,19 +152,36 @@ public: ...@@ -147,19 +152,36 @@ public:
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context); vector<RealVec>& posData = extractPositions(context);
RealVec boxSize = extractBoxSize(context); RealVec* boxVectors = extractBoxVectors(context);
double invBoxSize[3] = {1/boxSize[0], 1/boxSize[1], 1/boxSize[2]}; double boxSize[3] = {boxVectors[0][0], boxVectors[1][1], boxVectors[2][2]};
double invBoxSize[3] = {1/boxVectors[0][0], 1/boxVectors[1][1], 1/boxVectors[2][2]};
bool triclinic = (boxVectors[0][1] != 0 || boxVectors[0][2] != 0 || boxVectors[1][0] != 0 || boxVectors[1][2] != 0 || boxVectors[2][0] != 0 || boxVectors[2][1] != 0);
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
int numThreads = threads.getNumThreads(); int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads; int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads; int end = (threadIndex+1)*numParticles/numThreads;
if (data.isPeriodic) if (data.isPeriodic) {
for (int i = start; i < end; i++) if (triclinic) {
for (int i = start; i < end; i++) {
RealVec pos = posData[i];
pos -= boxVectors[2]*floor(pos[2]*invBoxSize[2]);
pos -= boxVectors[1]*floor(pos[1]*invBoxSize[1]);
pos -= boxVectors[0]*floor(pos[0]*invBoxSize[0]);
posq[4*i] = (float) pos[0];
posq[4*i+1] = (float) pos[1];
posq[4*i+2] = (float) pos[2];
}
}
else {
for (int i = start; i < end; i++) {
for (int j = 0; j < 3; j++) { for (int j = 0; j < 3; j++) {
RealOpenMM x = posData[i][j]; RealOpenMM x = posData[i][j];
double base = floor(x*invBoxSize[j])*boxSize[j]; double base = floor(x*invBoxSize[j])*boxSize[j];
posq[4*i+j] = (float) (x-base); posq[4*i+j] = (float) (x-base);
} }
}
}
}
else else
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
posq[4*i] = (float) posData[i][0]; posq[4*i] = (float) posData[i][0];
...@@ -499,8 +521,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -499,8 +521,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context); vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context); vector<RealVec>& forceData = extractForces(context);
RealVec boxSize = extractBoxSize(context); RealVec* boxVectors = extractBoxVectors(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0); double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald); bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
...@@ -546,16 +567,17 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -546,16 +567,17 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
} }
} }
if (needRecompute) { if (needRecompute) {
neighborList->computeNeighborList(numParticles, posq, exclusions, floatBoxSize, data.isPeriodic, nonbondedCutoff+padding, data.threads); neighborList->computeNeighborList(numParticles, posq, exclusions, boxVectors, data.isPeriodic, nonbondedCutoff+padding, data.threads);
lastPositions = posData; lastPositions = posData;
} }
nonbonded->setUseCutoff(nonbondedCutoff, *neighborList, rfDielectric); nonbonded->setUseCutoff(nonbondedCutoff, *neighborList, rfDielectric);
} }
if (data.isPeriodic) { if (data.isPeriodic) {
RealVec* boxVectors = extractBoxVectors(context);
double minAllowedSize = 1.999999*nonbondedCutoff; double minAllowedSize = 1.999999*nonbondedCutoff;
if (boxSize[0] < minAllowedSize || boxSize[1] < minAllowedSize || boxSize[2] < minAllowedSize) if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff."); throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
nonbonded->setPeriodic(floatBoxSize); nonbonded->setPeriodic(boxVectors);
} }
if (ewald) if (ewald)
nonbonded->setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]); nonbonded->setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]);
...@@ -569,8 +591,8 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -569,8 +591,8 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
if (includeReciprocal) { if (includeReciprocal) {
if (useOptimizedPme) { if (useOptimizedPme) {
PmeIO io(&posq[0], &data.threadForce[0][0], numParticles); PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
Vec3 periodicBoxSize(boxSize[0], boxSize[1], boxSize[2]); Vec3 periodicBoxVectors[3] = {boxVectors[0], boxVectors[1], boxVectors[2]};
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().beginComputation(io, periodicBoxSize, includeEnergy); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().beginComputation(io, periodicBoxVectors, includeEnergy);
nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io); nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
} }
else else
...@@ -582,7 +604,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -582,7 +604,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
ReferenceLJCoulomb14 nonbonded14; ReferenceLJCoulomb14 nonbonded14;
refBondForce.calculateForce(num14, bonded14IndexArray, posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14); refBondForce.calculateForce(num14, bonded14IndexArray, posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (data.isPeriodic) if (data.isPeriodic)
energy += dispersionCoefficient/(boxSize[0]*boxSize[1]*boxSize[2]); energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
} }
return energy; return energy;
} }
...@@ -736,19 +758,18 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C ...@@ -736,19 +758,18 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context); vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context); vector<RealVec>& forceData = extractForces(context);
RealVec& box = extractBoxSize(context); RealVec* boxVectors = extractBoxVectors(context);
float floatBoxSize[3] = {(float) box[0], (float) box[1], (float) box[2]};
double energy = 0; double energy = 0;
bool periodic = (nonbondedMethod == CutoffPeriodic); bool periodic = (nonbondedMethod == CutoffPeriodic);
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
neighborList->computeNeighborList(numParticles, data.posq, exclusions, floatBoxSize, data.isPeriodic, nonbondedCutoff, data.threads); neighborList->computeNeighborList(numParticles, data.posq, exclusions, boxVectors, data.isPeriodic, nonbondedCutoff, data.threads);
nonbonded->setUseCutoff(nonbondedCutoff, *neighborList); nonbonded->setUseCutoff(nonbondedCutoff, *neighborList);
} }
if (periodic) { if (periodic) {
double minAllowedSize = 2*nonbondedCutoff; double minAllowedSize = 2*nonbondedCutoff;
if (box[0] < minAllowedSize || box[1] < minAllowedSize || box[2] < minAllowedSize) if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff."); throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
nonbonded->setPeriodic(box); nonbonded->setPeriodic(boxVectors);
} }
bool globalParamsChanged = false; bool globalParamsChanged = false;
for (int i = 0; i < (int) globalParameterNames.size(); i++) { for (int i = 0; i < (int) globalParameterNames.size(); i++) {
...@@ -767,7 +788,7 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc ...@@ -767,7 +788,7 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner()); longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner());
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
} }
energy += longRangeCoefficient/(box[0]*box[1]*box[2]); energy += longRangeCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
return energy; return energy;
} }
...@@ -975,13 +996,12 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB ...@@ -975,13 +996,12 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& forceData = extractForces(context); vector<RealVec>& forceData = extractForces(context);
RealOpenMM energy = 0; RealOpenMM energy = 0;
RealVec& box = extractBoxSize(context); RealVec* boxVectors = extractBoxVectors(context);
float floatBoxSize[3] = {(float) box[0], (float) box[1], (float) box[2]};
if (data.isPeriodic) if (data.isPeriodic)
ixn->setPeriodic(extractBoxSize(context)); ixn->setPeriodic(extractBoxSize(context));
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
vector<set<int> > noExclusions(numParticles); vector<set<int> > noExclusions(numParticles);
neighborList->computeNeighborList(numParticles, data.posq, exclusions, floatBoxSize, data.isPeriodic, nonbondedCutoff, data.threads); neighborList->computeNeighborList(numParticles, data.posq, exclusions, boxVectors, data.isPeriodic, nonbondedCutoff, data.threads);
ixn->setUseCutoff(nonbondedCutoff, *neighborList); ixn->setUseCutoff(nonbondedCutoff, *neighborList);
} }
map<string, double> globalParameters; map<string, double> globalParameters;
...@@ -1038,6 +1058,7 @@ void CpuCalcCustomManyParticleForceKernel::initialize(const System& system, cons ...@@ -1038,6 +1058,7 @@ void CpuCalcCustomManyParticleForceKernel::initialize(const System& system, cons
ixn = new CpuCustomManyParticleForce(force, data.threads); ixn = new CpuCustomManyParticleForce(force, data.threads);
nonbondedMethod = CalcCustomManyParticleForceKernel::NonbondedMethod(force.getNonbondedMethod()); nonbondedMethod = CalcCustomManyParticleForceKernel::NonbondedMethod(force.getNonbondedMethod());
cutoffDistance = force.getCutoffDistance(); cutoffDistance = force.getCutoffDistance();
data.isPeriodic = (nonbondedMethod == CutoffPeriodic);
} }
double CpuCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
...@@ -1045,11 +1066,11 @@ double CpuCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -1045,11 +1066,11 @@ double CpuCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
for (int i = 0; i < (int) globalParameterNames.size(); i++) for (int i = 0; i < (int) globalParameterNames.size(); i++)
globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]); globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]);
if (nonbondedMethod == CutoffPeriodic) { if (nonbondedMethod == CutoffPeriodic) {
RealVec& box = extractBoxSize(context); RealVec* boxVectors = extractBoxVectors(context);
double minAllowedSize = 2*cutoffDistance; double minAllowedSize = 2*cutoffDistance;
if (box[0] < minAllowedSize || box[1] < minAllowedSize || box[2] < minAllowedSize) if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff."); throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
ixn->setPeriodic(box); ixn->setPeriodic(boxVectors);
} }
double energy = 0; double energy = 0;
ixn->calculateIxn(data.posq, particleParamArray, globalParameters, data.threadForce, includeForces, includeEnergy, energy); ixn->calculateIxn(data.posq, particleParamArray, globalParameters, data.threadForce, includeForces, includeEnergy, energy);
......
This diff is collapsed.
...@@ -103,20 +103,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -103,20 +103,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
also been set, and the smallest side of the periodic box is at least twice the cutoff also been set, and the smallest side of the periodic box is at least twice the cutoff
distance. distance.
@param boxSize the X, Y, and Z widths of the periodic box @param periodicBoxVectors the vectors defining the periodic box
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setPeriodic(float* periodicBoxSize) { void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
assert(cutoff); assert(cutoff);
assert(periodicBoxSize[0] >= 2*cutoffDistance); assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
assert(periodicBoxSize[1] >= 2*cutoffDistance); assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
assert(periodicBoxSize[2] >= 2*cutoffDistance); assert(periodicBoxVectors[2][2] >= 2.0*cutoffDistance);
periodic = true; periodic = true;
this->periodicBoxSize[0] = periodicBoxSize[0]; this->periodicBoxVectors[0] = periodicBoxVectors[0];
this->periodicBoxSize[1] = periodicBoxSize[1]; this->periodicBoxVectors[1] = periodicBoxVectors[1];
this->periodicBoxSize[2] = periodicBoxSize[2]; this->periodicBoxVectors[2] = periodicBoxVectors[2];
recipBoxSize[0] = (float) (1.0/periodicBoxVectors[0][0]);
recipBoxSize[1] = (float) (1.0/periodicBoxVectors[1][1]);
recipBoxSize[2] = (float) (1.0/periodicBoxVectors[2][2]);
periodicBoxVec4.resize(3);
periodicBoxVec4[0] = fvec4(periodicBoxVectors[0][0], periodicBoxVectors[0][1], periodicBoxVectors[0][2], 0);
periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -186,18 +196,16 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -186,18 +196,16 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
int kmax = (ewald ? max(numRx, max(numRy,numRz)) : 0); int kmax = (ewald ? max(numRx, max(numRy,numRz)) : 0);
float factorEwald = -1 / (4*alphaEwald*alphaEwald); float factorEwald = -1 / (4*alphaEwald*alphaEwald);
float TWO_PI = 2.0 * PI_M; float TWO_PI = 2.0 * PI_M;
float recipCoeff = (float)(ONE_4PI_EPS0*4*PI_M/(periodicBoxSize[0] * periodicBoxSize[1] * periodicBoxSize[2]) /epsilon); float recipCoeff = (float)(ONE_4PI_EPS0*4*PI_M/(periodicBoxVectors[0][0] * periodicBoxVectors[1][1] * periodicBoxVectors[2][2]) /epsilon);
if (pme) { if (pme) {
pme_t pmedata; pme_t pmedata;
RealOpenMM virial[3][3];
pme_init(&pmedata, alphaEwald, numberOfAtoms, meshDim, 5, 1); pme_init(&pmedata, alphaEwald, numberOfAtoms, meshDim, 5, 1);
vector<RealOpenMM> charges(numberOfAtoms); vector<RealOpenMM> charges(numberOfAtoms);
for (int i = 0; i < numberOfAtoms; i++) for (int i = 0; i < numberOfAtoms; i++)
charges[i] = posq[4*i+3]; charges[i] = posq[4*i+3];
RealOpenMM boxSize[3] = {periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2]};
RealOpenMM recipEnergy = 0.0; RealOpenMM recipEnergy = 0.0;
pme_exec(pmedata, atomCoordinates, forces, charges, boxSize, &recipEnergy, virial); pme_exec(pmedata, atomCoordinates, forces, charges, periodicBoxVectors, &recipEnergy);
if (totalEnergy) if (totalEnergy)
*totalEnergy += recipEnergy; *totalEnergy += recipEnergy;
pme_destroy(pmedata); pme_destroy(pmedata);
...@@ -209,7 +217,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -209,7 +217,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup reciprocal box // setup reciprocal box
float recipBoxSize[3] = { TWO_PI / periodicBoxSize[0], TWO_PI / periodicBoxSize[1], TWO_PI / periodicBoxSize[2]}; float recipBoxSize[3] = { TWO_PI / periodicBoxVectors[0][0], TWO_PI / periodicBoxVectors[1][1], TWO_PI / periodicBoxVectors[2][2]};
// setup K-vectors // setup K-vectors
...@@ -330,8 +338,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -330,8 +338,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
threadEnergy[threadIndex] = 0; threadEnergy[threadIndex] = 0;
double* energyPtr = (includeEnergy ? &threadEnergy[threadIndex] : NULL); double* energyPtr = (includeEnergy ? &threadEnergy[threadIndex] : NULL);
float* forces = &(*threadForce)[threadIndex][0]; float* forces = &(*threadForce)[threadIndex][0];
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0); fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0); fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (ewald || pme) { if (ewald || pme) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
...@@ -344,8 +352,6 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -344,8 +352,6 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum. // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
for (int i = threadIndex; i < numberOfAtoms; i += numThreads) { for (int i = threadIndex; i < numberOfAtoms; i += numThreads) {
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f); fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) { for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
...@@ -454,9 +460,16 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -454,9 +460,16 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
deltaR = posJ-posI; deltaR = posJ-posI;
if (periodic) { if (periodic) {
if (triclinic) {
deltaR -= periodicBoxVec4[2]*floorf(deltaR[2]*recipBoxSize[2]+0.5f);
deltaR -= periodicBoxVec4[1]*floorf(deltaR[1]*recipBoxSize[1]+0.5f);
deltaR -= periodicBoxVec4[0]*floorf(deltaR[0]*recipBoxSize[0]+0.5f);
}
else {
fvec4 base = round(deltaR*invBoxSize)*boxSize; fvec4 base = round(deltaR*invBoxSize)*boxSize;
deltaR = deltaR-base; deltaR = deltaR-base;
} }
}
r2 = dot3(deltaR, deltaR); r2 = dot3(deltaR, deltaR);
} }
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios. /* Portions copyright (c) 2006-2014 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -46,6 +46,14 @@ CpuNonbondedForceVec4::CpuNonbondedForceVec4() { ...@@ -46,6 +46,14 @@ CpuNonbondedForceVec4::CpuNonbondedForceVec4() {
} }
void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic)
calculateBlockIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
else
calculateBlockIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
...@@ -75,7 +83,7 @@ void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, dou ...@@ -75,7 +83,7 @@ void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, dou
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
fvec4 dx, dy, dz, r2; fvec4 dx, dy, dz, r2;
getDeltaR(posq+4*atom, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize); getDeltaR<TRICLINIC>(posq+4*atom, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec4 include; ivec4 include;
char excl = exclusions[i]; char excl = exclusions[i];
if (excl == 0) if (excl == 0)
...@@ -155,6 +163,14 @@ void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, dou ...@@ -155,6 +163,14 @@ void CpuNonbondedForceVec4::calculateBlockIxn(int blockIndex, float* forces, dou
} }
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic)
calculateBlockEwaldIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
else
calculateBlockEwaldIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
...@@ -184,7 +200,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces ...@@ -184,7 +200,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
fvec4 dx, dy, dz, r2; fvec4 dx, dy, dz, r2;
getDeltaR(posq+4*atom, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize); getDeltaR<TRICLINIC>(posq+4*atom, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec4 include; ivec4 include;
char excl = exclusions[i]; char excl = exclusions[i];
if (excl == 0) if (excl == 0)
...@@ -257,15 +273,29 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces ...@@ -257,15 +273,29 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
(fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]); (fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]);
} }
template <bool TRICLINIC>
void CpuNonbondedForceVec4::getDeltaR(const float* posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForceVec4::getDeltaR(const float* posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0]; dx = x-posI[0];
dy = y-posI[1]; dy = y-posI[1];
dz = z-posI[2]; dz = z-posI[2];
if (periodic) { if (periodic) {
if (TRICLINIC) {
fvec4 scale3 = floor(dz*recipBoxSize[2]+0.5f);
dx -= scale3*periodicBoxVectors[2][0];
dy -= scale3*periodicBoxVectors[2][1];
dz -= scale3*periodicBoxVectors[2][2];
fvec4 scale2 = floor(dy*recipBoxSize[1]+0.5f);
dx -= scale2*periodicBoxVectors[1][0];
dy -= scale2*periodicBoxVectors[1][1];
fvec4 scale1 = floor(dx*recipBoxSize[0]+0.5f);
dx -= scale1*periodicBoxVectors[0][0];
}
else {
dx -= round(dx*invBoxSize[0])*boxSize[0]; dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1]; dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2]; dz -= round(dz*invBoxSize[2])*boxSize[2];
} }
}
r2 = dx*dx + dy*dy + dz*dz; r2 = dx*dx + dy*dy + dz*dz;
} }
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios. /* Portions copyright (c) 2006-2014 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -78,6 +78,14 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() { ...@@ -78,6 +78,14 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
} }
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic)
calculateBlockIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
else
calculateBlockIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
...@@ -106,7 +114,7 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou ...@@ -106,7 +114,7 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
fvec8 dx, dy, dz, r2; fvec8 dx, dy, dz, r2;
getDeltaR(&posq[4*atom], blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize); getDeltaR<TRICLINIC>(&posq[4*atom], blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec8 include; ivec8 include;
char excl = exclusions[i]; char excl = exclusions[i];
if (excl == 0) if (excl == 0)
...@@ -186,6 +194,14 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou ...@@ -186,6 +194,14 @@ void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, dou
} }
void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
if (triclinic)
calculateBlockEwaldIxnImpl<true>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
else
calculateBlockEwaldIxnImpl<false>(blockIndex, forces, totalEnergy, boxSize, invBoxSize);
}
template <bool TRICLINIC>
void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[8*blockIndex];
...@@ -214,7 +230,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces ...@@ -214,7 +230,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
fvec8 dx, dy, dz, r2; fvec8 dx, dy, dz, r2;
getDeltaR(&posq[4*atom], blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize); getDeltaR<TRICLINIC>(&posq[4*atom], blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
ivec8 include; ivec8 include;
char excl = exclusions[i]; char excl = exclusions[i];
if (excl == 0) if (excl == 0)
...@@ -287,15 +303,29 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces ...@@ -287,15 +303,29 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxn(int blockIndex, float* forces
(fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]); (fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]);
} }
template <bool TRICLINIC>
void CpuNonbondedForceVec8::getDeltaR(const float* posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForceVec8::getDeltaR(const float* posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0]; dx = x-posI[0];
dy = y-posI[1]; dy = y-posI[1];
dz = z-posI[2]; dz = z-posI[2];
if (periodic) { if (periodic) {
if (TRICLINIC) {
fvec8 scale3 = floor(dz*recipBoxSize[2]+0.5f);
dx -= scale3*periodicBoxVectors[2][0];
dy -= scale3*periodicBoxVectors[2][1];
dz -= scale3*periodicBoxVectors[2][2];
fvec8 scale2 = floor(dy*recipBoxSize[1]+0.5f);
dx -= scale2*periodicBoxVectors[1][0];
dy -= scale2*periodicBoxVectors[1][1];
fvec8 scale1 = floor(dx*recipBoxSize[0]+0.5f);
dx -= scale1*periodicBoxVectors[0][0];
}
else {
dx -= round(dx*invBoxSize[0])*boxSize[0]; dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1]; dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2]; dz -= round(dz*invBoxSize[2])*boxSize[2];
} }
}
r2 = dx*dx + dy*dy + dz*dz; r2 = dx*dx + dy*dy + dz*dz;
} }
......
...@@ -53,7 +53,17 @@ using namespace std; ...@@ -53,7 +53,17 @@ using namespace std;
const double TOL = 1e-5; const double TOL = 1e-5;
void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& positions, const vector<const int*>& expectedSets, double boxSize) { Vec3 computeDelta(const Vec3& pos1, const Vec3& pos2, bool periodic, const Vec3* periodicBoxVectors) {
Vec3 diff = pos1-pos2;
if (periodic) {
diff -= periodicBoxVectors[2]*floor(diff[2]/periodicBoxVectors[2][2]+0.5);
diff -= periodicBoxVectors[1]*floor(diff[1]/periodicBoxVectors[1][1]+0.5);
diff -= periodicBoxVectors[0]*floor(diff[0]/periodicBoxVectors[0][0]+0.5);
}
return diff;
}
void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& positions, const vector<const int*>& expectedSets, double boxSize, bool triclinic) {
// Create a System and Context. // Create a System and Context.
int numParticles = force->getNumParticles(); int numParticles = force->getNumParticles();
...@@ -61,7 +71,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p ...@@ -61,7 +71,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
System system; System system;
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
system.addParticle(1.0); system.addParticle(1.0);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize)); Vec3 boxVectors[3];
if (triclinic) {
boxVectors[0] = Vec3(boxSize, 0, 0);
boxVectors[1] = Vec3(0.2*boxSize, boxSize, 0);
boxVectors[2] = Vec3(-0.3*boxSize, -0.1*boxSize, boxSize);
}
else {
boxVectors[0] = Vec3(boxSize, 0, 0);
boxVectors[1] = Vec3(0, boxSize, 0);
boxVectors[2] = Vec3(0, 0, boxSize);
}
system.setDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
system.addForce(force); system.addForce(force);
VerletIntegrator integrator(0.001); VerletIntegrator integrator(0.001);
CpuPlatform platform; CpuPlatform platform;
...@@ -73,20 +94,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p ...@@ -73,20 +94,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
// See if the energy matches the expected value. // See if the energy matches the expected value.
double expectedEnergy = 0; double expectedEnergy = 0;
bool periodic = (nonbondedMethod == CustomManyParticleForce::CutoffPeriodic);
for (int i = 0; i < (int) expectedSets.size(); i++) { for (int i = 0; i < (int) expectedSets.size(); i++) {
int p1 = expectedSets[i][0]; int p1 = expectedSets[i][0];
int p2 = expectedSets[i][1]; int p2 = expectedSets[i][1];
int p3 = expectedSets[i][2]; int p3 = expectedSets[i][2];
Vec3 d12 = positions[p2]-positions[p1]; Vec3 d12 = computeDelta(positions[p2], positions[p1], periodic, boxVectors);
Vec3 d13 = positions[p3]-positions[p1]; Vec3 d13 = computeDelta(positions[p3], positions[p1], periodic, boxVectors);
Vec3 d23 = positions[p3]-positions[p2]; Vec3 d23 = computeDelta(positions[p3], positions[p2], periodic, boxVectors);
if (nonbondedMethod == CustomManyParticleForce::CutoffPeriodic) {
for (int j = 0; j < 3; j++) {
d12[j] -= floor(d12[j]/boxSize+0.5f)*boxSize;
d13[j] -= floor(d13[j]/boxSize+0.5f)*boxSize;
d23[j] -= floor(d23[j]/boxSize+0.5f)*boxSize;
}
}
double r12 = sqrt(d12.dot(d12)); double r12 = sqrt(d12.dot(d12));
double r13 = sqrt(d13.dot(d13)); double r13 = sqrt(d13.dot(d13));
double r23 = sqrt(d23.dot(d23)); double r23 = sqrt(d23.dot(d23));
...@@ -210,7 +225,7 @@ void testNoCutoff() { ...@@ -210,7 +225,7 @@ void testNoCutoff() {
positions.push_back(Vec3(0.4, 0, -0.8)); positions.push_back(Vec3(0.4, 0, -0.8));
int sets[4][3] = {{0,1,2}, {1,2,3}, {2,3,0}, {3,0,1}}; int sets[4][3] = {{0,1,2}, {1,2,3}, {2,3,0}, {3,0,1}};
vector<const int*> expectedSets(&sets[0], &sets[4]); vector<const int*> expectedSets(&sets[0], &sets[4]);
validateAxilrodTeller(force, positions, expectedSets, 2.0); validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
} }
void testCutoff() { void testCutoff() {
...@@ -235,7 +250,7 @@ void testCutoff() { ...@@ -235,7 +250,7 @@ void testCutoff() {
positions.push_back(Vec3(0.2, 0.5, -0.1)); positions.push_back(Vec3(0.2, 0.5, -0.1));
int sets[7][3] = {{0,1,2}, {0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,2,4}, {1,3,4}}; int sets[7][3] = {{0,1,2}, {0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,2,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[7]); vector<const int*> expectedSets(&sets[0], &sets[7]);
validateAxilrodTeller(force, positions, expectedSets, 2.0); validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
} }
void testPeriodic() { void testPeriodic() {
...@@ -261,7 +276,33 @@ void testPeriodic() { ...@@ -261,7 +276,33 @@ void testPeriodic() {
double boxSize = 2.1; double boxSize = 2.1;
int sets[5][3] = {{0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,3,4}}; int sets[5][3] = {{0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[5]); vector<const int*> expectedSets(&sets[0], &sets[5]);
validateAxilrodTeller(force, positions, expectedSets, boxSize); validateAxilrodTeller(force, positions, expectedSets, boxSize, false);
}
void testTriclinic() {
CustomManyParticleForce* force = new CustomManyParticleForce(3,
"C*(1+3*cos(theta1)*cos(theta2)*cos(theta3))/(r12*r13*r23)^3;"
"theta1=angle(p1,p2,p3); theta2=angle(p2,p3,p1); theta3=angle(p3,p1,p2);"
"r12=distance(p1,p2); r13=distance(p1,p3); r23=distance(p2,p3)");
force->addGlobalParameter("C", 1.5);
force->setNonbondedMethod(CustomManyParticleForce::CutoffPeriodic);
force->setCutoffDistance(1.05);
vector<double> params;
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
vector<Vec3> positions;
positions.push_back(Vec3(0, 0, 0));
positions.push_back(Vec3(1, 0, 0));
positions.push_back(Vec3(0, 1.1, 0.3));
positions.push_back(Vec3(0.4, 0, -0.8));
positions.push_back(Vec3(0.2, 0.5, -0.1));
double boxSize = 2.1;
int sets[4][3] = {{0,1,3}, {0,1,4}, {0,3,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[4]);
validateAxilrodTeller(force, positions, expectedSets, boxSize, true);
} }
void testExclusions() { void testExclusions() {
...@@ -286,7 +327,7 @@ void testExclusions() { ...@@ -286,7 +327,7 @@ void testExclusions() {
force->addExclusion(0, 3); force->addExclusion(0, 3);
int sets[5][3] = {{0,1,4}, {1,2,3}, {1,2,4}, {1,3,4}, {2,3,4}}; int sets[5][3] = {{0,1,4}, {1,2,3}, {1,2,4}, {1,3,4}, {2,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[5]); vector<const int*> expectedSets(&sets[0], &sets[5]);
validateAxilrodTeller(force, positions, expectedSets, 2.0); validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
} }
void testAllTerms() { void testAllTerms() {
...@@ -680,6 +721,7 @@ int main() { ...@@ -680,6 +721,7 @@ int main() {
testNoCutoff(); testNoCutoff();
testCutoff(); testCutoff();
testPeriodic(); testPeriodic();
testTriclinic();
testExclusions(); testExclusions();
testAllTerms(); testAllTerms();
testParameters(); testParameters();
......
...@@ -224,6 +224,65 @@ void testPeriodic() { ...@@ -224,6 +224,65 @@ void testPeriodic() {
ASSERT_EQUAL_TOL(1.9+1+0.9, state.getPotentialEnergy(), TOL); ASSERT_EQUAL_TOL(1.9+1+0.9, state.getPotentialEnergy(), TOL);
} }
void testTriclinic() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
Vec3 a(3.1, 0, 0);
Vec3 b(0.4, 3.5, 0);
Vec3 c(-0.1, -0.5, 4.0);
system.setDefaultPeriodicBoxVectors(a, b, c);
VerletIntegrator integrator(0.01);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("r");
nonbonded->addParticle(vector<double>());
nonbonded->addParticle(vector<double>());
nonbonded->setNonbondedMethod(CustomNonbondedForce::CutoffPeriodic);
const double cutoff = 1.5;
nonbonded->setCutoffDistance(cutoff);
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int iteration = 0; iteration < 50; iteration++) {
// Generate random positions for the two particles.
positions[0] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
positions[1] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
context.setPositions(positions);
// Loop over all possible periodic copies and find the nearest one.
Vec3 delta;
double distance2 = 100.0;
for (int i = -1; i < 2; i++)
for (int j = -1; j < 2; j++)
for (int k = -1; k < 2; k++) {
Vec3 d = positions[1]-positions[0]+a*i+b*j+c*k;
if (d.dot(d) < distance2) {
delta = d;
distance2 = d.dot(d);
}
}
double distance = sqrt(distance2);
// See if the force and energy are correct.
State state = context.getState(State::Forces | State::Energy);
if (distance >= cutoff) {
ASSERT_EQUAL(0.0, state.getPotentialEnergy());
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[0], 0);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[1], 0);
}
else {
const Vec3 force = delta/sqrt(delta.dot(delta));
ASSERT_EQUAL_TOL(distance, state.getPotentialEnergy(), TOL);
ASSERT_EQUAL_VEC(force, state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(-force, state.getForces()[1], TOL);
}
}
}
void testContinuous1DFunction() { void testContinuous1DFunction() {
System system; System system;
system.addParticle(1.0); system.addParticle(1.0);
...@@ -852,6 +911,7 @@ int main() { ...@@ -852,6 +911,7 @@ int main() {
testExclusions(); testExclusions();
testCutoff(); testCutoff();
testPeriodic(); testPeriodic();
testTriclinic();
testContinuous1DFunction(); testContinuous1DFunction();
testContinuous2DFunction(); testContinuous2DFunction();
testContinuous3DFunction(); testContinuous3DFunction();
......
...@@ -201,6 +201,57 @@ void testEwald2Ions() { ...@@ -201,6 +201,57 @@ void testEwald2Ions() {
ASSERT_EQUAL_TOL(-217.276, state.getPotentialEnergy(), 0.01/*10*TOL*/); ASSERT_EQUAL_TOL(-217.276, state.getPotentialEnergy(), 0.01/*10*TOL*/);
} }
void testTriclinic() {
// Create a triclinic box containing eight particles.
System system;
system.setDefaultPeriodicBoxVectors(Vec3(2.5, 0, 0), Vec3(0.5, 3.0, 0), Vec3(0.7, 0.9, 3.5));
for (int i = 0; i < 8; i++)
system.addParticle(1.0);
NonbondedForce* force = new NonbondedForce();
system.addForce(force);
force->setNonbondedMethod(NonbondedForce::PME);
force->setCutoffDistance(1.0);
force->setPMEParameters(3.45891, 32, 40, 48);
for (int i = 0; i < 4; i++)
force->addParticle(-1, 0.440104, 0.4184); // Cl parameters
for (int i = 0; i < 4; i++)
force->addParticle(1, 0.332840, 0.0115897); // Na parameters
vector<Vec3> positions(8);
positions[0] = Vec3(1.744, 2.788, 3.162);
positions[1] = Vec3(1.048, 0.762, 2.340);
positions[2] = Vec3(2.489, 1.570, 2.817);
positions[3] = Vec3(1.027, 1.893, 3.271);
positions[4] = Vec3(0.937, 0.825, 0.009);
positions[5] = Vec3(2.290, 1.887, 3.352);
positions[6] = Vec3(1.266, 1.111, 2.894);
positions[7] = Vec3(0.933, 1.862, 3.490);
// Compute the forces and energy.
VerletIntegrator integ(0.001);
Context context(system, integ, platform);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
// Compare them to values computed by Gromacs.
double expectedEnergy = -963.370;
vector<Vec3> expectedForce(8);
expectedForce[0] = Vec3(4.25253e+01, -1.23503e+02, 1.22139e+02);
expectedForce[1] = Vec3(9.74752e+01, 1.68213e+02, 1.93169e+02);
expectedForce[2] = Vec3(-1.50348e+02, 1.29165e+02, 3.70435e+02);
expectedForce[3] = Vec3(9.18644e+02, -3.52571e+00, -1.34772e+03);
expectedForce[4] = Vec3(-1.61193e+02, 9.01528e+01, -7.12904e+01);
expectedForce[5] = Vec3(2.82630e+02, 2.78029e+01, -3.72864e+02);
expectedForce[6] = Vec3(-1.47454e+02, -2.14448e+02, -3.55789e+02);
expectedForce[7] = Vec3(-8.82195e+02, -7.39132e+01, 1.46202e+03);
for (int i = 0; i < 8; i++) {
ASSERT_EQUAL_VEC(expectedForce[i], state.getForces()[i], 1e-4);
}
ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-4);
}
void testErrorTolerance(NonbondedForce::NonbondedMethod method) { void testErrorTolerance(NonbondedForce::NonbondedMethod method) {
// Create a cloud of random point charges. // Create a cloud of random point charges.
...@@ -261,6 +312,7 @@ int main(int argc, char* argv[]) { ...@@ -261,6 +312,7 @@ int main(int argc, char* argv[]) {
testEwaldPME(false); testEwaldPME(false);
testEwaldPME(true); testEwaldPME(true);
// testEwald2Ions(); // testEwald2Ions();
testTriclinic();
testErrorTolerance(NonbondedForce::Ewald); testErrorTolerance(NonbondedForce::Ewald);
testErrorTolerance(NonbondedForce::PME); testErrorTolerance(NonbondedForce::PME);
} }
......
...@@ -48,10 +48,21 @@ ...@@ -48,10 +48,21 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
void testNeighborList(bool periodic) { void testNeighborList(bool periodic, bool triclinic) {
const int numParticles = 500; const int numParticles = 500;
const float cutoff = 2.0f; const float cutoff = 2.0f;
const float boxSize[3] = {20.0f, 15.0f, 22.0f}; RealVec boxVectors[3];
if (triclinic) {
boxVectors[0] = RealVec(20, 0, 0);
boxVectors[1] = RealVec(5, 15, 0);
boxVectors[2] = RealVec(-3, -7, 22);
}
else {
boxVectors[0] = RealVec(20, 0, 0);
boxVectors[1] = RealVec(0, 15, 0);
boxVectors[2] = RealVec(0, 0, 22);
}
const float boxSize[3] = {boxVectors[0][0], boxVectors[1][1], boxVectors[2][2]};
const int blockSize = 8; const int blockSize = 8;
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
...@@ -69,7 +80,7 @@ void testNeighborList(bool periodic) { ...@@ -69,7 +80,7 @@ void testNeighborList(bool periodic) {
} }
ThreadPool threads; ThreadPool threads;
CpuNeighborList neighborList(blockSize); CpuNeighborList neighborList(blockSize);
neighborList.computeNeighborList(numParticles, positions, exclusions, boxSize, periodic, cutoff, threads); neighborList.computeNeighborList(numParticles, positions, exclusions, boxVectors, periodic, cutoff, threads);
// Convert the neighbor list to a set for faster lookup. // Convert the neighbor list to a set for faster lookup.
...@@ -94,15 +105,13 @@ void testNeighborList(bool periodic) { ...@@ -94,15 +105,13 @@ void testNeighborList(bool periodic) {
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
for (int j = 0; j <= i; j++) { for (int j = 0; j <= i; j++) {
bool shouldInclude = (exclusions[i].find(j) == exclusions[i].end()); bool shouldInclude = (exclusions[i].find(j) == exclusions[i].end());
float dx = positions[4*i]-positions[4*j]; Vec3 diff(positions[4*i]-positions[4*j], positions[4*i+1]-positions[4*j+1], positions[4*i+2]-positions[4*j+2]);
float dy = positions[4*i+1]-positions[4*j+1];
float dz = positions[4*i+2]-positions[4*j+2];
if (periodic) { if (periodic) {
dx -= floor(dx/boxSize[0]+0.5f)*boxSize[0]; diff -= boxVectors[2]*floor(diff[2]/boxSize[2]+0.5);
dy -= floor(dy/boxSize[1]+0.5f)*boxSize[1]; diff -= boxVectors[1]*floor(diff[1]/boxSize[1]+0.5);
dz -= floor(dz/boxSize[2]+0.5f)*boxSize[2]; diff -= boxVectors[0]*floor(diff[0]/boxSize[0]+0.5);
} }
if (dx*dx + dy*dy + dz*dz > cutoff*cutoff) if (diff.dot(diff) > cutoff*cutoff)
shouldInclude = false; shouldInclude = false;
bool isIncluded = (neighbors.find(make_pair(i, j)) != neighbors.end() || neighbors.find(make_pair(j, i)) != neighbors.end()); bool isIncluded = (neighbors.find(make_pair(i, j)) != neighbors.end() || neighbors.find(make_pair(j, i)) != neighbors.end());
if (shouldInclude) if (shouldInclude)
...@@ -116,8 +125,9 @@ int main() { ...@@ -116,8 +125,9 @@ int main() {
cout << "CPU is not supported. Exiting." << endl; cout << "CPU is not supported. Exiting." << endl;
return 0; return 0;
} }
testNeighborList(false); testNeighborList(false, false);
testNeighborList(true); testNeighborList(true, false);
testNeighborList(true, true);
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -353,6 +353,67 @@ void testPeriodic() { ...@@ -353,6 +353,67 @@ void testPeriodic() {
ASSERT_EQUAL_TOL(2*ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL); ASSERT_EQUAL_TOL(2*ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL);
} }
void testTriclinic() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
Vec3 a(3.1, 0, 0);
Vec3 b(0.4, 3.5, 0);
Vec3 c(-0.1, -0.5, 4.0);
system.setDefaultPeriodicBoxVectors(a, b, c);
VerletIntegrator integrator(0.01);
NonbondedForce* nonbonded = new NonbondedForce();
nonbonded->addParticle(1.0, 1, 0);
nonbonded->addParticle(1.0, 1, 0);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
const double cutoff = 1.5;
nonbonded->setCutoffDistance(cutoff);
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
const double eps = 78.3;
const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
for (int iteration = 0; iteration < 50; iteration++) {
// Generate random positions for the two particles.
positions[0] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
positions[1] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
context.setPositions(positions);
// Loop over all possible periodic copies and find the nearest one.
Vec3 delta;
double distance2 = 100.0;
for (int i = -1; i < 2; i++)
for (int j = -1; j < 2; j++)
for (int k = -1; k < 2; k++) {
Vec3 d = positions[1]-positions[0]+a*i+b*j+c*k;
if (d.dot(d) < distance2) {
delta = d;
distance2 = d.dot(d);
}
}
double distance = sqrt(distance2);
// See if the force and energy are correct.
State state = context.getState(State::Forces | State::Energy);
if (distance >= cutoff) {
ASSERT_EQUAL(0.0, state.getPotentialEnergy());
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[0], 0);
ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[1], 0);
}
else {
const Vec3 force = delta*ONE_4PI_EPS0*(-1.0/(distance*distance*distance)+2.0*krf);
ASSERT_EQUAL_TOL(ONE_4PI_EPS0*(1.0/distance+krf*distance*distance-crf), state.getPotentialEnergy(), 1e-4);
ASSERT_EQUAL_VEC(force, state.getForces()[0], 2e-5);
ASSERT_EQUAL_VEC(-force, state.getForces()[1], 2e-5);
}
}
}
void testLargeSystem() { void testLargeSystem() {
const int numMolecules = 600; const int numMolecules = 600;
...@@ -635,6 +696,7 @@ int main(int argc, char* argv[]) { ...@@ -635,6 +696,7 @@ int main(int argc, char* argv[]) {
testCutoff(); testCutoff();
testCutoff14(); testCutoff14();
testPeriodic(); testPeriodic();
testTriclinic();
testLargeSystem(); testLargeSystem();
testDispersionCorrection(); testDispersionCorrection();
testChangingParameters(); testChangingParameters();
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. * * Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -352,43 +352,62 @@ public: ...@@ -352,43 +352,62 @@ public:
/** /**
* Get whether double precision is being used. * Get whether double precision is being used.
*/ */
bool getUseDoublePrecision() { bool getUseDoublePrecision() const {
return useDoublePrecision; return useDoublePrecision;
} }
/** /**
* Get whether mixed precision is being used. * Get whether mixed precision is being used.
*/ */
bool getUseMixedPrecision() { bool getUseMixedPrecision() const {
return useMixedPrecision; return useMixedPrecision;
} }
/**
* Get whether the periodic box is triclinic.
*/
bool getBoxIsTriclinic() const {
return boxIsTriclinic;
}
/** /**
* Convert a number to a string in a format suitable for including in a kernel. * Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision. * This takes into account whether the context uses single or double precision.
*/ */
std::string doubleToString(double value); std::string doubleToString(double value) const;
/** /**
* Convert a number to a string in a format suitable for including in a kernel. * Convert a number to a string in a format suitable for including in a kernel.
*/ */
std::string intToString(int value); std::string intToString(int value) const;
/** /**
* Convert a CUDA result code to the corresponding string description. * Convert a CUDA result code to the corresponding string description.
*/ */
static std::string getErrorString(CUresult result); static std::string getErrorString(CUresult result);
/** /**
* Get the size of the periodic box. * Get the vectors defining the periodic box.
*/ */
double4 getPeriodicBoxSize() const { void getPeriodicBoxVectors(Vec3& a, Vec3& b, Vec3& c) const {
return periodicBoxSize; a = Vec3(periodicBoxVecX.x, periodicBoxVecX.y, periodicBoxVecX.z);
b = Vec3(periodicBoxVecY.x, periodicBoxVecY.y, periodicBoxVecY.z);
c = Vec3(periodicBoxVecZ.x, periodicBoxVecZ.y, periodicBoxVecZ.z);
} }
/** /**
* Set the size of the periodic box. * Set the vectors defining the periodic box.
*/ */
void setPeriodicBoxSize(double xsize, double ysize, double zsize) { void setPeriodicBoxVectors(const Vec3& a, const Vec3& b, const Vec3& c) {
periodicBoxSize = make_double4(xsize, ysize, zsize, 0.0); periodicBoxVecX = make_double4(a[0], a[1], a[2], 0.0);
invPeriodicBoxSize = make_double4(1.0/xsize, 1.0/ysize, 1.0/zsize, 0.0); periodicBoxVecY = make_double4(b[0], b[1], b[2], 0.0);
periodicBoxSizeFloat = make_float4((float) xsize, (float) ysize, (float) zsize, 0.0f); periodicBoxVecZ = make_double4(c[0], c[1], c[2], 0.0);
invPeriodicBoxSizeFloat = make_float4(1.0f/(float) xsize, 1.0f/(float) ysize, 1.0f/(float) zsize, 0.0f); periodicBoxVecXFloat = make_float4((float) a[0], (float) a[1], (float) a[2], 0.0f);
periodicBoxVecYFloat = make_float4((float) b[0], (float) b[1], (float) b[2], 0.0f);
periodicBoxVecZFloat = make_float4((float) c[0], (float) c[1], (float) c[2], 0.0f);
periodicBoxSize = make_double4(a[0], b[1], c[2], 0.0);
invPeriodicBoxSize = make_double4(1.0/a[0], 1.0/b[1], 1.0/c[2], 0.0);
periodicBoxSizeFloat = make_float4((float) a[0], (float) b[1], (float) c[2], 0.0f);
invPeriodicBoxSizeFloat = make_float4(1.0f/(float) a[0], 1.0f/(float) b[1], 1.0f/(float) c[2], 0.0f);
}
/**
* Get the size of the periodic box.
*/
double4 getPeriodicBoxSize() const {
return periodicBoxSize;
} }
/** /**
* Get the inverse of the size of the periodic box. * Get the inverse of the size of the periodic box.
...@@ -410,6 +429,27 @@ public: ...@@ -410,6 +429,27 @@ public:
void* getInvPeriodicBoxSizePointer() { void* getInvPeriodicBoxSizePointer() {
return (useDoublePrecision ? reinterpret_cast<void*>(&invPeriodicBoxSize) : reinterpret_cast<void*>(&invPeriodicBoxSizeFloat)); return (useDoublePrecision ? reinterpret_cast<void*>(&invPeriodicBoxSize) : reinterpret_cast<void*>(&invPeriodicBoxSizeFloat));
} }
/**
* Get a pointer to the first periodic box vector, represented as either a float4 or double4 depending on
* this context's precision. This value is suitable for passing to kernels as an argument.
*/
void* getPeriodicBoxVecXPointer() {
return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecX) : reinterpret_cast<void*>(&periodicBoxVecXFloat));
}
/**
* Get a pointer to the second periodic box vector, represented as either a float4 or double4 depending on
* this context's precision. This value is suitable for passing to kernels as an argument.
*/
void* getPeriodicBoxVecYPointer() {
return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecY) : reinterpret_cast<void*>(&periodicBoxVecYFloat));
}
/**
* Get a pointer to the third periodic box vector, represented as either a float4 or double4 depending on
* this context's precision. This value is suitable for passing to kernels as an argument.
*/
void* getPeriodicBoxVecZPointer() {
return (useDoublePrecision ? reinterpret_cast<void*>(&periodicBoxVecZ) : reinterpret_cast<void*>(&periodicBoxVecZFloat));
}
/** /**
* Get the CudaIntegrationUtilities for this context. * Get the CudaIntegrationUtilities for this context.
*/ */
...@@ -525,10 +565,10 @@ private: ...@@ -525,10 +565,10 @@ private:
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered; bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic;
std::string compiler, tempDir, cacheDir, gpuArchitecture; std::string compiler, tempDir, cacheDir, gpuArchitecture;
float4 periodicBoxSizeFloat, invPeriodicBoxSizeFloat; float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxSize, invPeriodicBoxSize; double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
std::string defaultOptimizationOptions; std::string defaultOptimizationOptions;
std::map<std::string, std::string> compilationDefines; std::map<std::string, std::string> compilationDefines;
CUcontext context; CUcontext context;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. * * Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -233,6 +233,66 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -233,6 +233,66 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["ERF"] = useDoublePrecision ? "erf" : "erff"; compilationDefines["ERF"] = useDoublePrecision ? "erf" : "erff";
compilationDefines["ERFC"] = useDoublePrecision ? "erfc" : "erfcf"; compilationDefines["ERFC"] = useDoublePrecision ? "erfc" : "erfcf";
// Set defines for applying periodic boundary conditions.
Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
boxIsTriclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
if (boxIsTriclinic) {
compilationDefines["APPLY_PERIODIC_TO_DELTA(delta)"] =
"{"
"real scale3 = floor(delta.z*invPeriodicBoxSize.z+0.5f); \\\n"
"delta.x -= scale3*periodicBoxVecZ.x; \\\n"
"delta.y -= scale3*periodicBoxVecZ.y; \\\n"
"delta.z -= scale3*periodicBoxVecZ.z; \\\n"
"real scale2 = floor(delta.y*invPeriodicBoxSize.y+0.5f); \\\n"
"delta.x -= scale2*periodicBoxVecY.x; \\\n"
"delta.y -= scale2*periodicBoxVecY.y; \\\n"
"real scale1 = floor(delta.x*invPeriodicBoxSize.x+0.5f); \\\n"
"delta.x -= scale1*periodicBoxVecX.x;}";
compilationDefines["APPLY_PERIODIC_TO_POS(pos)"] =
"{"
"real scale3 = floor(pos.z*invPeriodicBoxSize.z); \\\n"
"pos.x -= scale3*periodicBoxVecZ.x; \\\n"
"pos.y -= scale3*periodicBoxVecZ.y; \\\n"
"pos.z -= scale3*periodicBoxVecZ.z; \\\n"
"real scale2 = floor(pos.y*invPeriodicBoxSize.y); \\\n"
"pos.x -= scale2*periodicBoxVecY.x; \\\n"
"pos.y -= scale2*periodicBoxVecY.y; \\\n"
"real scale1 = floor(pos.x*invPeriodicBoxSize.x); \\\n"
"pos.x -= scale1*periodicBoxVecX.x;}";
compilationDefines["APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"] =
"{"
"real scale3 = floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f); \\\n"
"pos.x -= scale3*periodicBoxVecZ.x; \\\n"
"pos.y -= scale3*periodicBoxVecZ.y; \\\n"
"pos.z -= scale3*periodicBoxVecZ.z; \\\n"
"real scale2 = floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f); \\\n"
"pos.x -= scale2*periodicBoxVecY.x; \\\n"
"pos.y -= scale2*periodicBoxVecY.y; \\\n"
"real scale1 = floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f); \\\n"
"pos.x -= scale1*periodicBoxVecX.x;}";
}
else {
compilationDefines["APPLY_PERIODIC_TO_DELTA(delta)"] =
"{"
"delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; \\\n"
"delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; \\\n"
"delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}";
compilationDefines["APPLY_PERIODIC_TO_POS(pos)"] =
"{"
"pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x; \\\n"
"pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y; \\\n"
"pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;}";
compilationDefines["APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"] =
"{"
"pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; \\\n"
"pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; \\\n"
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}";
}
// Create the work thread used for parallelization when running on multiple devices. // Create the work thread used for parallelization when running on multiple devices.
thread = new WorkThread(); thread = new WorkThread();
...@@ -534,7 +594,7 @@ void CudaContext::restoreDefaultStream() { ...@@ -534,7 +594,7 @@ void CudaContext::restoreDefaultStream() {
setCurrentStream(0); setCurrentStream(0);
} }
string CudaContext::doubleToString(double value) { string CudaContext::doubleToString(double value) const {
stringstream s; stringstream s;
s.precision(useDoublePrecision ? 16 : 8); s.precision(useDoublePrecision ? 16 : 8);
s << scientific << value; s << scientific << value;
...@@ -543,7 +603,7 @@ string CudaContext::doubleToString(double value) { ...@@ -543,7 +603,7 @@ string CudaContext::doubleToString(double value) {
return s.str(); return s.str();
} }
string CudaContext::intToString(int value) { string CudaContext::intToString(int value) const {
stringstream s; stringstream s;
s << value; s << value;
return s.str(); return s.str();
...@@ -1078,16 +1138,21 @@ void CudaContext::reorderAtomsImpl() { ...@@ -1078,16 +1138,21 @@ void CudaContext::reorderAtomsImpl() {
// Move each molecule position into the same box. // Move each molecule position into the same box.
for (int i = 0; i < numMolecules; i++) { for (int i = 0; i < numMolecules; i++) {
int xcell = (int) floor(molPos[i].x*invPeriodicBoxSize.x); Real4 center = molPos[i];
int ycell = (int) floor(molPos[i].y*invPeriodicBoxSize.y); int zcell = (int) floor(center.z*invPeriodicBoxSize.z);
int zcell = (int) floor(molPos[i].z*invPeriodicBoxSize.z); center.x -= zcell*periodicBoxVecZ.x;
Real dx = xcell*periodicBoxSize.x; center.y -= zcell*periodicBoxVecZ.y;
Real dy = ycell*periodicBoxSize.y; center.z -= zcell*periodicBoxVecZ.z;
Real dz = zcell*periodicBoxSize.z; int ycell = (int) floor(center.y*invPeriodicBoxSize.y);
if (dx != 0.0f || dy != 0.0f || dz != 0.0f) { center.x -= ycell*periodicBoxVecY.x;
molPos[i].x -= dx; center.y -= ycell*periodicBoxVecY.y;
molPos[i].y -= dy; int xcell = (int) floor(center.x*invPeriodicBoxSize.x);
molPos[i].z -= dz; center.x -= xcell*periodicBoxVecX.x;
if (xcell != 0 || ycell != 0 || zcell != 0) {
Real dx = molPos[i].x-center.x;
Real dy = molPos[i].y-center.y;
Real dz = molPos[i].z-center.z;
molPos[i] = center;
for (int j = 0; j < (int) atoms.size(); j++) { for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i]; int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom]; Real4 p = oldPosq[atom];
......
This diff is collapsed.
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. * * Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -304,6 +304,9 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -304,6 +304,9 @@ void CudaNonbondedUtilities::initialize(const System& system) {
findBlockBoundsArgs.push_back(&numAtoms); findBlockBoundsArgs.push_back(&numAtoms);
findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer()); findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer());
findBlockBoundsArgs.push_back(context.getInvPeriodicBoxSizePointer()); findBlockBoundsArgs.push_back(context.getInvPeriodicBoxSizePointer());
findBlockBoundsArgs.push_back(context.getPeriodicBoxVecXPointer());
findBlockBoundsArgs.push_back(context.getPeriodicBoxVecYPointer());
findBlockBoundsArgs.push_back(context.getPeriodicBoxVecZPointer());
findBlockBoundsArgs.push_back(&context.getPosq().getDevicePointer()); findBlockBoundsArgs.push_back(&context.getPosq().getDevicePointer());
findBlockBoundsArgs.push_back(&blockCenter->getDevicePointer()); findBlockBoundsArgs.push_back(&blockCenter->getDevicePointer());
findBlockBoundsArgs.push_back(&blockBoundingBox->getDevicePointer()); findBlockBoundsArgs.push_back(&blockBoundingBox->getDevicePointer());
...@@ -322,6 +325,9 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -322,6 +325,9 @@ void CudaNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel = context.getKernel(interactingBlocksProgram, "findBlocksWithInteractions"); findInteractingBlocksKernel = context.getKernel(interactingBlocksProgram, "findBlocksWithInteractions");
findInteractingBlocksArgs.push_back(context.getPeriodicBoxSizePointer()); findInteractingBlocksArgs.push_back(context.getPeriodicBoxSizePointer());
findInteractingBlocksArgs.push_back(context.getInvPeriodicBoxSizePointer()); findInteractingBlocksArgs.push_back(context.getInvPeriodicBoxSizePointer());
findInteractingBlocksArgs.push_back(context.getPeriodicBoxVecXPointer());
findInteractingBlocksArgs.push_back(context.getPeriodicBoxVecYPointer());
findInteractingBlocksArgs.push_back(context.getPeriodicBoxVecZPointer());
findInteractingBlocksArgs.push_back(&interactionCount->getDevicePointer()); findInteractingBlocksArgs.push_back(&interactionCount->getDevicePointer());
findInteractingBlocksArgs.push_back(&interactingTiles->getDevicePointer()); findInteractingBlocksArgs.push_back(&interactingTiles->getDevicePointer());
findInteractingBlocksArgs.push_back(&interactingAtoms->getDevicePointer()); findInteractingBlocksArgs.push_back(&interactingAtoms->getDevicePointer());
...@@ -390,10 +396,10 @@ void CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -390,10 +396,10 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms");
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles->getDevicePointer(); forceArgs[7] = &interactingTiles->getDevicePointer();
findInteractingBlocksArgs[3] = &interactingTiles->getDevicePointer(); findInteractingBlocksArgs[6] = &interactingTiles->getDevicePointer();
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[14] = &interactingAtoms->getDevicePointer(); forceArgs[17] = &interactingAtoms->getDevicePointer();
findInteractingBlocksArgs[4] = &interactingAtoms->getDevicePointer(); findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer();
if (context.getUseDoublePrecision()) { if (context.getUseDoublePrecision()) {
vector<double4> oldPositionsVec(numAtoms, make_double4(1e30, 1e30, 1e30, 0)); vector<double4> oldPositionsVec(numAtoms, make_double4(1e30, 1e30, 1e30, 0));
oldPositions->upload(oldPositionsVec); oldPositions->upload(oldPositionsVec);
...@@ -627,6 +633,9 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -627,6 +633,9 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
forceArgs.push_back(&interactionCount->getDevicePointer()); forceArgs.push_back(&interactionCount->getDevicePointer());
forceArgs.push_back(context.getPeriodicBoxSizePointer()); forceArgs.push_back(context.getPeriodicBoxSizePointer());
forceArgs.push_back(context.getInvPeriodicBoxSizePointer()); forceArgs.push_back(context.getInvPeriodicBoxSizePointer());
forceArgs.push_back(context.getPeriodicBoxVecXPointer());
forceArgs.push_back(context.getPeriodicBoxVecYPointer());
forceArgs.push_back(context.getPeriodicBoxVecZPointer());
forceArgs.push_back(&maxTiles); forceArgs.push_back(&maxTiles);
forceArgs.push_back(&blockCenter->getDevicePointer()); forceArgs.push_back(&blockCenter->getDevicePointer());
forceArgs.push_back(&blockBoundingBox->getDevicePointer()); forceArgs.push_back(&blockBoundingBox->getDevicePointer());
......
...@@ -17,7 +17,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -17,7 +17,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles, const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -56,9 +57,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -56,9 +57,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
real3 pos2 = localData[atom2].pos; real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z); real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -109,9 +108,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -109,9 +108,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
real3 pos2 = localData[atom2].pos; real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z); real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -254,12 +251,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -254,12 +251,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
pos1.x -= floor((pos1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
pos1.y -= floor((pos1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x].pos, blockCenterX)
pos1.z -= floor((pos1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
localData[threadIdx.x].pos.x -= floor((localData[threadIdx.x].pos.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].pos.y -= floor((localData[threadIdx.x].pos.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].pos.z -= floor((localData[threadIdx.x].pos.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj; int atom2 = tbx+tj;
...@@ -306,9 +299,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -306,9 +299,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
real3 pos2 = localData[atom2].pos; real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z); real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -14,7 +14,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -14,7 +14,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value, const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -51,9 +52,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -51,9 +52,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
real3 pos2 = localData[atom2].pos; real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z); real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -100,9 +99,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -100,9 +99,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
real3 pos2 = localData[atom2].pos; real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z); real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -229,12 +226,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -229,12 +226,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
pos1.x -= floor((pos1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
pos1.y -= floor((pos1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[threadIdx.x].pos, blockCenterX)
pos1.z -= floor((pos1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
localData[threadIdx.x].pos.x -= floor((localData[threadIdx.x].pos.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
localData[threadIdx.x].pos.y -= floor((localData[threadIdx.x].pos.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[threadIdx.x].pos.z -= floor((localData[threadIdx.x].pos.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
unsigned int tj = tgx; unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj; int atom2 = tbx+tj;
...@@ -268,9 +261,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -268,9 +261,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
real3 pos2 = localData[atom2].pos; real3 pos2 = localData[atom2].pos;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z); real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -25,12 +25,10 @@ inline __device__ real4 delta(real4 vec1, real4 vec2) { ...@@ -25,12 +25,10 @@ inline __device__ real4 delta(real4 vec1, real4 vec2) {
* Compute the difference between two vectors, taking periodic boundary conditions into account * Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude. * and setting the fourth component to the squared magnitude.
*/ */
inline __device__ real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) { inline __device__ real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f); real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(result)
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
...@@ -69,7 +67,8 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) { ...@@ -69,7 +67,8 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors. * Compute forces on donors.
*/ */
extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq, extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
extern __shared__ real4 posBuffer[]; extern __shared__ real4 posBuffer[];
real energy = 0; real energy = 0;
...@@ -116,7 +115,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f ...@@ -116,7 +115,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
real4 a1 = posBuffer[3*index]; real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1]; real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2]; real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize); real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) { if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif #endif
...@@ -157,7 +156,8 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f ...@@ -157,7 +156,8 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
* Compute forces on acceptors. * Compute forces on acceptors.
*/ */
extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq, extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
extern __shared__ real4 posBuffer[]; extern __shared__ real4 posBuffer[];
real3 f1 = make_real3(0); real3 f1 = make_real3(0);
...@@ -203,7 +203,7 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_ ...@@ -203,7 +203,7 @@ extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict_
real4 d1 = posBuffer[3*index]; real4 d1 = posBuffer[3*index];
real4 d2 = posBuffer[3*index+1]; real4 d2 = posBuffer[3*index+1];
real4 d3 = posBuffer[3*index+2]; real4 d3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize); real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) { if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif #endif
......
...@@ -18,12 +18,10 @@ inline __device__ real3 trim(real4 v) { ...@@ -18,12 +18,10 @@ inline __device__ real3 trim(real4 v) {
* Compute the difference between two vectors, taking periodic boundary conditions into account * Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude. * and setting the fourth component to the squared magnitude.
*/ */
inline __device__ real4 delta(real3 vec1, real3 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) { inline __device__ real4 delta(real3 vec1, real3 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f); real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(result)
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
...@@ -81,7 +79,7 @@ __constant__ float globals[NUM_GLOBALS]; ...@@ -81,7 +79,7 @@ __constant__ float globals[NUM_GLOBALS];
*/ */
extern "C" __global__ void computeInteraction( extern "C" __global__ void computeInteraction(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
real4 periodicBoxSize, real4 invPeriodicBoxSize real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, const int* __restrict__ neighbors, const int* __restrict__ neighborStartIndex , const int* __restrict__ neighbors, const int* __restrict__ neighborStartIndex
#endif #endif
...@@ -144,16 +142,14 @@ extern "C" __global__ void computeInteraction( ...@@ -144,16 +142,14 @@ extern "C" __global__ void computeInteraction(
/** /**
* Find a bounding box for the atoms in each block. * Find a bounding box for the atoms in each block.
*/ */
extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, const real4* __restrict__ posq, extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real4* __restrict__ blockCenter, real4* __restrict__ blockBoundingBox, int* __restrict__ numNeighborPairs) { const real4* __restrict__ posq, real4* __restrict__ blockCenter, real4* __restrict__ blockBoundingBox, int* __restrict__ numNeighborPairs) {
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = blockIdx.x*blockDim.x+threadIdx.x;
int base = index*TILE_SIZE; int base = index*TILE_SIZE;
while (base < NUM_ATOMS) { while (base < NUM_ATOMS) {
real4 pos = posq[base]; real4 pos = posq[base];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
#endif #endif
real4 minPos = pos; real4 minPos = pos;
real4 maxPos = pos; real4 maxPos = pos;
...@@ -162,9 +158,7 @@ extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPerio ...@@ -162,9 +158,7 @@ extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPerio
pos = posq[i]; pos = posq[i];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
real4 center = 0.5f*(maxPos+minPos); real4 center = 0.5f*(maxPos+minPos);
pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
minPos = make_real4(min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0); minPos = make_real4(min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0);
maxPos = make_real4(max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0); maxPos = make_real4(max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0);
...@@ -182,8 +176,8 @@ extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPerio ...@@ -182,8 +176,8 @@ extern "C" __global__ void findBlockBounds(real4 periodicBoxSize, real4 invPerio
/** /**
* Find a list of neighbors for each atom. * Find a list of neighbors for each atom.
*/ */
extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, const real4* __restrict__ posq, extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
const real4* __restrict__ blockCenter, const real4* __restrict__ blockBoundingBox, int2* __restrict__ neighborPairs, const real4* __restrict__ posq, const real4* __restrict__ blockCenter, const real4* __restrict__ blockBoundingBox, int2* __restrict__ neighborPairs,
int* __restrict__ numNeighborPairs, int* __restrict__ numNeighborsForAtom, int maxNeighborPairs int* __restrict__ numNeighborPairs, int* __restrict__ numNeighborsForAtom, int maxNeighborPairs
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
, int* __restrict__ exclusions, int* __restrict__ exclusionStartIndex , int* __restrict__ exclusions, int* __restrict__ exclusionStartIndex
...@@ -216,9 +210,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi ...@@ -216,9 +210,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
real4 blockSize2 = blockBoundingBox[block2]; real4 blockSize2 = blockBoundingBox[block2];
real4 blockDelta = blockCenter1-blockCenter2; real4 blockDelta = blockCenter1-blockCenter2;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
blockDelta.x -= floor(blockDelta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(blockDelta)
blockDelta.y -= floor(blockDelta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
blockDelta.z -= floor(blockDelta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSize1.x-blockSize2.x); blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSize1.x-blockSize2.x);
blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSize1.y-blockSize2.y); blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSize1.y-blockSize2.y);
...@@ -247,7 +239,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi ...@@ -247,7 +239,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
// Decide whether to include this atom pair in the neighbor list. // Decide whether to include this atom pair in the neighbor list.
real4 atomDelta = delta(pos1, pos2, periodicBoxSize, invPeriodicBoxSize); real4 atomDelta = delta(pos1, pos2, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CENTRAL_PARTICLE #ifdef USE_CENTRAL_PARTICLE
bool includeAtom = (atom2 != atom1 && atom2 < NUM_ATOMS && atomDelta.w < CUTOFF_SQUARED); bool includeAtom = (atom2 != atom1 && atom2 < NUM_ATOMS && atomDelta.w < CUTOFF_SQUARED);
#else #else
......
...@@ -10,7 +10,7 @@ typedef struct { ...@@ -10,7 +10,7 @@ typedef struct {
extern "C" __global__ void computeInteractionGroups( extern "C" __global__ void computeInteractionGroups(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const int4* __restrict__ groupData, unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const int4* __restrict__ groupData,
real4 periodicBoxSize, real4 invPeriodicBoxSize real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; // global warpIndex const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; // global warpIndex
...@@ -47,9 +47,7 @@ extern "C" __global__ void computeInteractionGroups( ...@@ -47,9 +47,7 @@ extern "C" __global__ void computeInteractionGroups(
posq2 = make_real4(localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q); posq2 = make_real4(localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z); real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -4,16 +4,15 @@ ...@@ -4,16 +4,15 @@
/** /**
* Find a bounding box for the atoms in each block. * Find a bounding box for the atoms in each block.
*/ */
extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, const real4* __restrict__ posq, extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real4* __restrict__ blockCenter, real4* __restrict__ blockBoundingBox, int* __restrict__ rebuildNeighborList, real2* __restrict__ sortedBlocks) { const real4* __restrict__ posq, real4* __restrict__ blockCenter, real4* __restrict__ blockBoundingBox, int* __restrict__ rebuildNeighborList,
real2* __restrict__ sortedBlocks) {
int index = blockIdx.x*blockDim.x+threadIdx.x; int index = blockIdx.x*blockDim.x+threadIdx.x;
int base = index*TILE_SIZE; int base = index*TILE_SIZE;
while (base < numAtoms) { while (base < numAtoms) {
real4 pos = posq[base]; real4 pos = posq[base];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
#endif #endif
real4 minPos = pos; real4 minPos = pos;
real4 maxPos = pos; real4 maxPos = pos;
...@@ -22,9 +21,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize, ...@@ -22,9 +21,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
pos = posq[i]; pos = posq[i];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
real4 center = 0.5f*(maxPos+minPos); real4 center = 0.5f*(maxPos+minPos);
pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
minPos = make_real4(min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0); minPos = make_real4(min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0);
maxPos = make_real4(max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0); maxPos = make_real4(max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0);
...@@ -116,11 +113,11 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co ...@@ -116,11 +113,11 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
* [in] rebuildNeighbourList - whether or not to execute this kernel * [in] rebuildNeighbourList - whether or not to execute this kernel
* *
*/ */
extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int* __restrict__ interactionCount, extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
int* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startBlockIndex, unsigned int* __restrict__ interactionCount, int* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq,
unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter, const real4* __restrict__ sortedBlockBoundingBox, unsigned int maxTiles, unsigned int startBlockIndex, unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter,
const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, real4* __restrict__ oldPositions, const real4* __restrict__ sortedBlockBoundingBox, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices,
const int* __restrict__ rebuildNeighborList) { real4* __restrict__ oldPositions, const int* __restrict__ rebuildNeighborList) {
if (rebuildNeighborList[0] == 0) if (rebuildNeighborList[0] == 0)
return; // The neighbor list doesn't need to be rebuilt. return; // The neighbor list doesn't need to be rebuilt.
...@@ -157,9 +154,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -157,9 +154,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
// The box is small enough that we can just translate all the atoms into a single periodic // The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
pos1.x -= floor((pos1.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos1, blockCenterX)
pos1.y -= floor((pos1.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos1.z -= floor((pos1.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
} }
#endif #endif
posBuffer[threadIdx.x] = pos1; posBuffer[threadIdx.x] = pos1;
...@@ -185,9 +180,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -185,9 +180,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
real4 blockSizeY = (block2 < NUM_BLOCKS ? sortedBlockBoundingBox[block2] : make_real4(0)); real4 blockSizeY = (block2 < NUM_BLOCKS ? sortedBlockBoundingBox[block2] : make_real4(0));
real4 blockDelta = blockCenterX-blockCenterY; real4 blockDelta = blockCenterX-blockCenterY;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
blockDelta.x -= floor(blockDelta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(blockDelta)
blockDelta.y -= floor(blockDelta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
blockDelta.z -= floor(blockDelta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSizeX.x-blockSizeY.x); blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSizeX.x-blockSizeY.x);
blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y); blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y);
...@@ -215,9 +208,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -215,9 +208,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
real3 pos2 = trimTo3(posq[atom2]); real3 pos2 = trimTo3(posq[atom2]);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
if (singlePeriodicCopy) { if (singlePeriodicCopy) {
pos2.x -= floor((pos2.x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos2, blockCenterX)
pos2.y -= floor((pos2.y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos2.z -= floor((pos2.z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
} }
#endif #endif
bool interacts = false; bool interacts = false;
...@@ -226,9 +217,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -226,9 +217,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
if (!singlePeriodicCopy) { if (!singlePeriodicCopy) {
for (int j = 0; j < TILE_SIZE; j++) { for (int j = 0; j < TILE_SIZE; j++) {
real3 delta = pos2-posBuffer[warpStart+j]; real3 delta = pos2-posBuffer[warpStart+j];
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
interacts |= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED); interacts |= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment