Commit 641389f7 authored by peastman's avatar peastman
Browse files

Merge pull request #193 from peastman/master

Minor optimizations to CPU platform: avoid applying periodic boundary conditions unnecessarily
parents eda69200 79f064af
......@@ -122,7 +122,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, std::vector<RealVec>& atomCoordinates,
void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
std::vector<RealVec>& forces, float* totalEnergy) const;
......@@ -132,6 +132,7 @@ class CpuNonbondedForce {
@param numberOfAtoms number of atoms
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (periodic boundary conditions not applied)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
......@@ -141,7 +142,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<std::pair<float, float> >& atomParameters,
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, float* forces, float* totalEnergy, ThreadPool& threads);
/**
......@@ -169,6 +170,7 @@ private:
// The following variables are used to make information accessible to the individual threads.
int numberOfAtoms;
float* posq;
RealVec const* atomCoordinates;
std::pair<float, float> const* atomParameters;
std::set<int> const* exclusions;
bool includeEnergy;
......
......@@ -203,11 +203,11 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
// Convert the positions to single precision.
if (periodic)
if (periodic || ewald || pme)
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
RealOpenMM x = posData[i][j];
double base = floor(x/boxSize[j]+0.5)*boxSize[j];
double base = floor(x/boxSize[j])*boxSize[j];
posq[4*i+j] = (float) (x-base);
}
else
......@@ -279,7 +279,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded.setUseSwitchingFunction(switchingDistance);
float nonbondedEnergy = 0;
if (includeDirect)
nonbonded.calculateDirectIxn(numParticles, &posq[0], particleParams, exclusions, &forces[0], includeEnergy ? &nonbondedEnergy : NULL, threads);
nonbonded.calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, &forces[0], includeEnergy ? &nonbondedEnergy : NULL, threads);
if (includeReciprocal) {
if (useOptimizedPme) {
PmeIO io(&posq[0], &forces[0], numParticles);
......
......@@ -177,14 +177,12 @@ public:
if (usePeriodic) {
endx = min(endx, centerVoxelIndex.x-dIndexX+nx-1);
endy = min(endy, centerVoxelIndex.y-dIndexY+ny-1);
numRanges = 2;
}
else {
startx = max(startx, 0);
starty = max(starty, 0);
endx = min(endx, nx-1);
endy = min(endy, ny-1);
numRanges = 1;
}
int lastSortedIndex = BlockSize*(blockIndex+1);
VoxelIndex voxelIndex(0, 0);
......@@ -204,10 +202,12 @@ public:
float dz = maxDistance+blockWidth[2];
dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy));
bool needPeriodic = (voxelIndex.x != x || voxelIndex.y != y || centerPos[2]-dz < 0.0f || centerPos[2]+dz > periodicBoxSize[2]);
int rangeStart[2];
int rangeEnd[2];
rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz);
if (usePeriodic) {
if (needPeriodic) {
numRanges = 2;
rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
if (rangeStart[0] > 0) {
rangeStart[1] = 0;
......@@ -218,8 +218,10 @@ public:
rangeEnd[1] = bins[voxelIndex.x][voxelIndex.y].size();
}
}
else
else {
numRanges = 1;
rangeEnd[0] = findUpperBound(voxelIndex.x, voxelIndex.y, centerPos[2]+dz);
}
// Loop over atoms and check to see if they are neighbors of this block.
......@@ -233,7 +235,7 @@ public:
fvec4 atomPos(atomLocations+4*sortedAtoms[sortedIndex]);
fvec4 delta = atomPos-centerPos;
if (usePeriodic) {
if (needPeriodic) {
fvec4 base = round(delta*invBoxSize)*boxSize;
delta = delta-base;
}
......@@ -250,7 +252,7 @@ public:
for (int k = 0; k < (int) blockAtoms.size(); k++) {
fvec4 pos1(&atomLocations[4*blockAtoms[k]]);
delta = atomPos-pos1;
if (usePeriodic) {
if (needPeriodic) {
fvec4 base = round(delta*invBoxSize)*boxSize;
delta = delta-base;
}
......
......@@ -177,7 +177,7 @@ void CpuNonbondedForce::tabulateEwaldScaleFactor() {
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, vector<RealVec>& atomCoordinates,
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
vector<RealVec>& forces, float* totalEnergy) const {
typedef std::complex<float> d_complex;
......@@ -291,12 +291,13 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v
}
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<pair<float, float> >& atomParameters,
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, float* forces, float* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
this->posq = posq;
this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0];
this->exclusions = &exclusions[0];
includeEnergy = (totalEnergy != NULL);
......@@ -348,13 +349,13 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
for (int i = threadIndex; i < numberOfAtoms; i += numThreads)
for (int i = threadIndex; i < numberOfAtoms; i += numThreads) {
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
if (*iter > i) {
int j = *iter;
fvec4 deltaR;
fvec4 posI(posq+4*i);
fvec4 posJ(posq+4*j);
fvec4 posJ((float) atomCoordinates[j][0], (float) atomCoordinates[j][1], (float) atomCoordinates[j][2], 0.0f);
float r2;
getDeltaR(posJ, posI, deltaR, r2, false, boxSize, invBoxSize);
float r = sqrtf(r2);
......@@ -372,6 +373,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
}
}
}
}
else if (cutoff) {
// Compute the interactions from the neighbor list.
......@@ -460,6 +462,15 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
bool needPeriodic = false;
if (periodic) {
for (int i = 0; i < 4 && !needPeriodic; i++)
for (int j = 0; j < 3; j++)
if (blockAtomPosq[i][j]-cutoffDistance < 0.0 || blockAtomPosq[i][j]+cutoffDistance > boxSize[j]) {
needPeriodic = true;
break;
}
}
// Loop over neighbors for this block.
......@@ -476,7 +487,7 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
bool any = false;
fvec4 dx, dy, dz, r2;
getDeltaR(atomPosq, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
getDeltaR(atomPosq, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
for (int j = 0; j < 4; j++) {
include[j] = (((exclusions[i]>>j)&1) == 0 && (!cutoff || r2[j] < cutoffDistance*cutoffDistance));
any |= include[j];
......@@ -561,6 +572,13 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
bool needPeriodic = false;
for (int i = 0; i < 4 && !needPeriodic; i++)
for (int j = 0; j < 3; j++)
if (blockAtomPosq[i][j]-cutoffDistance < 0.0 || blockAtomPosq[i][j]+cutoffDistance > boxSize[j]) {
needPeriodic = true;
break;
}
// Loop over neighbors for this block.
......@@ -577,7 +595,7 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
bool any = false;
fvec4 dx, dy, dz, r2;
getDeltaR(atomPosq, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
getDeltaR(atomPosq, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, needPeriodic, boxSize, invBoxSize);
for (int j = 0; j < 4; j++) {
include[j] = (((exclusions[i]>>j)&1) == 0 && r2[j] < cutoffDistance*cutoffDistance);
any |= include[j];
......
......@@ -397,44 +397,44 @@ void testLargeSystem() {
system.addForce(bonds);
VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01);
Context cuContext(system, integrator1, platform);
Context cpuContext(system, integrator1, platform);
Context referenceContext(system, integrator2, reference);
cuContext.setPositions(positions);
cuContext.setVelocities(velocities);
cpuContext.setPositions(positions);
cpuContext.setVelocities(velocities);
referenceContext.setPositions(positions);
referenceContext.setVelocities(velocities);
State cuState = cuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
State cpuState = cpuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
State referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(cuState.getPositions()[i], referenceState.getPositions()[i], tol);
ASSERT_EQUAL_VEC(cuState.getVelocities()[i], referenceState.getVelocities()[i], tol);
ASSERT_EQUAL_VEC(cuState.getForces()[i], referenceState.getForces()[i], tol);
ASSERT_EQUAL_VEC(cpuState.getPositions()[i], referenceState.getPositions()[i], tol);
ASSERT_EQUAL_VEC(cpuState.getVelocities()[i], referenceState.getVelocities()[i], tol);
ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
}
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
// Now do the same thing with periodic boundary conditions.
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
cuContext.reinitialize();
cpuContext.reinitialize();
referenceContext.reinitialize();
cuContext.setPositions(positions);
cuContext.setVelocities(velocities);
cpuContext.setPositions(positions);
cpuContext.setVelocities(velocities);
referenceContext.setPositions(positions);
referenceContext.setVelocities(velocities);
cuState = cuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
cpuState = cpuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) {
double dx = cuState.getPositions()[i][0]-referenceState.getPositions()[i][0];
double dy = cuState.getPositions()[i][1]-referenceState.getPositions()[i][1];
double dz = cuState.getPositions()[i][2]-referenceState.getPositions()[i][2];
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
ASSERT_EQUAL_VEC(cuState.getVelocities()[i], referenceState.getVelocities()[i], tol);
ASSERT_EQUAL_VEC(cuState.getForces()[i], referenceState.getForces()[i], tol);
double dx = cpuState.getPositions()[i][0]-referenceState.getPositions()[i][0];
double dy = cpuState.getPositions()[i][1]-referenceState.getPositions()[i][1];
double dz = cpuState.getPositions()[i][2]-referenceState.getPositions()[i][2];
ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
ASSERT_EQUAL_VEC(cpuState.getVelocities()[i], referenceState.getVelocities()[i], tol);
ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
}
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
}
void testDispersionCorrection() {
......@@ -542,15 +542,15 @@ void testChangingParameters() {
VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01);
Context cuContext(system, integrator1, platform);
Context cpuContext(system, integrator1, platform);
Context referenceContext(system, integrator2, reference);
cuContext.setPositions(positions);
cpuContext.setPositions(positions);
referenceContext.setPositions(positions);
State cuState = cuContext.getState(State::Forces | State::Energy);
State cpuState = cpuContext.getState(State::Forces | State::Energy);
State referenceState = referenceContext.getState(State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(cuState.getForces()[i], referenceState.getForces()[i], tol);
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
// Now modify parameters and see if they still agree.
......@@ -559,13 +559,13 @@ void testChangingParameters() {
nonbonded->getParticleParameters(i, charge, sigma, epsilon);
nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
}
nonbonded->updateParametersInContext(cuContext);
nonbonded->updateParametersInContext(cpuContext);
nonbonded->updateParametersInContext(referenceContext);
cuState = cuContext.getState(State::Forces | State::Energy);
cpuState = cpuContext.getState(State::Forces | State::Energy);
referenceState = referenceContext.getState(State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(cuState.getForces()[i], referenceState.getForces()[i], tol);
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
}
void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
......
......@@ -75,9 +75,9 @@ pme_init(pme_t * ppme,
*/
int
pme_exec(pme_t pme,
std::vector<OpenMM::RealVec>& atomCoordinates,
const std::vector<OpenMM::RealVec>& atomCoordinates,
std::vector<OpenMM::RealVec>& forces,
std::vector<RealOpenMM>& charges,
const std::vector<RealOpenMM>& charges,
const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy,
RealOpenMM pme_virial[3][3]);
......
......@@ -195,7 +195,7 @@ pme_calculate_bsplines_moduli(pme_t pme)
static void
pme_update_grid_index_and_fraction(pme_t pme,
vector<RealVec>& atomCoordinates,
const vector<RealVec>& atomCoordinates,
const RealOpenMM periodicBoxSize[3])
{
int i;
......@@ -317,7 +317,7 @@ pme_update_bsplines(pme_t pme)
static void
pme_grid_spread_charge(pme_t pme, vector<RealOpenMM>& charges)
pme_grid_spread_charge(pme_t pme, const vector<RealOpenMM>& charges)
{
int order;
int i;
......@@ -521,7 +521,7 @@ pme_reciprocal_convolution(pme_t pme,
static void
pme_grid_interpolate_force(pme_t pme,
const RealOpenMM periodicBoxSize[3],
vector<RealOpenMM>& charges,
const vector<RealOpenMM>& charges,
vector<RealVec>& forces)
{
int i;
......@@ -666,11 +666,11 @@ pme_init(pme_t * ppme,
int pme_exec(pme_t pme,
vector<RealVec>& atomCoordinates,
const vector<RealVec>& atomCoordinates,
vector<RealVec>& forces,
vector<RealOpenMM>& charges,
const vector<RealOpenMM>& charges,
const RealOpenMM periodicBoxSize[3],
RealOpenMM * energy,
RealOpenMM* energy,
RealOpenMM pme_virial[3][3])
{
/* Routine is called with coordinates in x, a box, and charges in q */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment