Commit 047934e2 authored by Rafal P. Wiewiora's avatar Rafal P. Wiewiora
Browse files

Merge remote-tracking branch 'upstream/master'

parents ce3a5dc0 d12c9bd1
/* Portions copyright (c) 2009-2014 Stanford University and Simbios.
/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -37,16 +37,6 @@
using namespace OpenMM;
using namespace std;
class CpuCustomManyParticleForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomManyParticleForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomManyParticleForce& owner;
};
CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) :
threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) {
numParticles = force.getNumParticles();
......@@ -98,7 +88,7 @@ CpuCustomManyParticleForce::~CpuCustomManyParticleForce() {
delete threadData[i];
}
void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpenMM** particleParameters,
void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, double** particleParameters,
const map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
bool includeForces, bool includeEnergy, double& energy) {
// Record the parameters for the threads.
......@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpe
// Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads();
// Combine the energies from all the threads.
......@@ -191,14 +180,14 @@ void CpuCustomManyParticleForce::threadComputeForce(ThreadPool& threads, int thr
}
}
void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
void CpuCustomManyParticleForce::setUseCutoff(double distance) {
useCutoff = true;
cutoffDistance = distance;
if (neighborList == NULL)
neighborList = new CpuNeighborList(4);
}
void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) {
void CpuCustomManyParticleForce::setPeriodic(Vec3* periodicBoxVectors) {
assert(useCutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
......@@ -220,7 +209,7 @@ void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) {
}
void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availableParticles, vector<int>& particleSet, int loopIndex, int startIndex,
RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
int numParticles = availableParticles.size();
double cutoff2 = cutoffDistance*cutoffDistance;
int checkRange = (centralParticleMode ? 1 : loopIndex);
......@@ -254,7 +243,7 @@ void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availablePart
}
}
void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
// Select the ordering to use for the particles.
vector<int>& permutedParticles = data.permutedParticles;
......
/* Portions copyright (c) 2009-2016 Stanford University and Simbios.
/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -33,16 +33,6 @@
using namespace OpenMM;
using namespace std;
class CpuCustomNonbondedForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomNonbondedForce& owner;
};
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
......@@ -70,7 +60,7 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression,
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) :
cutoff(false), useSwitch(false), periodic(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions));
}
......@@ -80,13 +70,14 @@ CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
delete threadData[i];
}
void CpuCustomNonbondedForce::setUseCutoff(RealOpenMM distance, const CpuNeighborList& neighbors) {
void CpuCustomNonbondedForce::setUseCutoff(double distance, const CpuNeighborList& neighbors) {
cutoff = true;
cutoffDistance = distance;
neighborList = &neighbors;
}
void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) {
useInteractionGroups = true;
for (int group = 0; group < (int) groups.size(); group++) {
const set<int>& set1 = groups[group].first;
const set<int>& set2 = groups[group].second;
......@@ -102,12 +93,12 @@ void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, s
}
}
void CpuCustomNonbondedForce::setUseSwitchingFunction(RealOpenMM distance) {
void CpuCustomNonbondedForce::setUseSwitchingFunction(double distance) {
useSwitch = true;
switchingDistance = distance;
}
void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
void CpuCustomNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
......@@ -129,9 +120,9 @@ void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
}
void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<RealVec>& atomCoordinates, RealOpenMM** atomParameters,
RealOpenMM* fixedParameters, const map<string, double>& globalParameters,
vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<Vec3>& atomCoordinates, double** atomParameters,
double* fixedParameters, const map<string, double>& globalParameters,
vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
......@@ -149,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
// Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads();
// Combine the energies from all the threads.
......@@ -183,7 +173,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
data.energyParamDerivs[i] = 0.0;
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (groupInteractions.size() > 0) {
if (useInteractionGroups) {
// The user has specified interaction groups, so compute only the requested interactions.
while (true) {
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -37,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096;
const float CpuGBSAOBCForce::TABLE_MIN = 0.25f;
const float CpuGBSAOBCForce::TABLE_MAX = 1.5f;
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuGBSAOBCForce& owner;
};
CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS;
logDXInv = 1.0f/logDX;
......@@ -89,6 +78,10 @@ void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, f
particleParams = params;
bornRadii.resize(params.size()+3);
obcChain.resize(params.size()+3);
for (int i = bornRadii.size()-3; i < bornRadii.size(); i++) {
bornRadii[i] = 0;
obcChain[i] = 0;
}
}
void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
......@@ -107,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align
// Signal the threads to start running and wait for them to finish.
ComputeTask task(*this);
gmx_atomic_set(&counter, 0);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads(); // Compute Born radii
gmx_atomic_set(&counter, 0);
threads.resumeThreads();
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Portions copyright (c) 2016-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -44,17 +44,6 @@
using namespace OpenMM;
using namespace std;
class CpuGayBerneForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGayBerneForce& owner, CpuNeighborList* neighborList) : owner(owner), neighborList(neighborList) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex, neighborList);
}
CpuGayBerneForce& owner;
CpuNeighborList* neighborList;
};
CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) {
// Record the force parameters.
......@@ -111,7 +100,7 @@ const vector<set<int> >& CpuGayBerneForce::getExclusions() const {
return particleExclusions;
}
RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, std::vector<RealVec>& forces, std::vector<AlignedArray<float> >& threadForce, RealVec* boxVectors, CpuPlatform::PlatformData& data) {
double CpuGayBerneForce::calculateForce(const vector<Vec3>& positions, std::vector<Vec3>& forces, std::vector<AlignedArray<float> >& threadForce, Vec3* boxVectors, CpuPlatform::PlatformData& data) {
if (nonbondedMethod == GayBerneForce::CutoffPeriodic) {
double minAllowedSize = 1.999999*cutoffDistance;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
......@@ -137,8 +126,7 @@ RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, st
// Signal the threads to compute the pairwise interactions.
ComputeTask task(*this, data.neighborList);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex, data.neighborList); });
threads.waitForThreads();
// Signal the threads to compute exceptions.
......@@ -164,10 +152,10 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
int numThreads = threads.getNumThreads();
threadEnergy[threadIndex] = 0;
float* forces = &(*threadForce)[threadIndex][0];
vector<RealVec>& torques = threadTorque[threadIndex];
vector<Vec3>& torques = threadTorque[threadIndex];
torques.resize(numParticles);
for (int i = 0; i < numParticles; i++)
torques[i] = RealVec();
torques[i] = Vec3();
double energy = 0.0;
// Compute this thread's subset of interactions.
......@@ -184,8 +172,8 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
continue;
if (particleExclusions[i].find(j) != particleExclusions[i].end())
continue; // This interaction will be handled by an exception.
RealOpenMM sigma = particles[i].sigmaOver2+particles[j].sigmaOver2;
RealOpenMM epsilon = particles[i].sqrtEpsilon*particles[j].sqrtEpsilon;
double sigma = particles[i].sigmaOver2+particles[j].sigmaOver2;
double epsilon = particles[i].sqrtEpsilon*particles[j].sqrtEpsilon;
energy += computeOneInteraction(i, j, sigma, epsilon, positions, forces, torques, boxVectors);
}
}
......@@ -208,8 +196,8 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
int second = blockAtom[k];
if (particles[second].sqrtEpsilon == 0.0f)
continue;
RealOpenMM sigma = particles[first].sigmaOver2+particles[second].sigmaOver2;
RealOpenMM epsilon = particles[first].sqrtEpsilon*particles[second].sqrtEpsilon;
double sigma = particles[first].sigmaOver2+particles[second].sigmaOver2;
double epsilon = particles[first].sqrtEpsilon*particles[second].sqrtEpsilon;
energy += computeOneInteraction(first, second, sigma, epsilon, positions, forces, torques, boxVectors);
}
}
......@@ -235,39 +223,39 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
threadEnergy[threadIndex] = energy;
}
void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions) {
void CpuGayBerneForce::computeEllipsoidFrames(const vector<Vec3>& positions) {
int numParticles = particles.size();
for (int particle = 0; particle < numParticles; particle++) {
ParticleInfo& p = particles[particle];
// Compute the local coordinate system of the ellipsoid;
RealVec xdir, ydir, zdir;
Vec3 xdir, ydir, zdir;
if (p.xparticle == -1) {
xdir = RealVec(1, 0, 0);
ydir = RealVec(0, 1, 0);
xdir = Vec3(1, 0, 0);
ydir = Vec3(0, 1, 0);
}
else {
xdir = positions[particle]-positions[p.xparticle];
xdir /= SQRT(xdir.dot(xdir));
xdir /= sqrt(xdir.dot(xdir));
if (p.yparticle == -1) {
if (xdir[1] > -0.5 && xdir[1] < 0.5)
ydir = RealVec(0, 1, 0);
ydir = Vec3(0, 1, 0);
else
ydir = RealVec(1, 0, 0);
ydir = Vec3(1, 0, 0);
}
else
ydir = positions[particle]-positions[p.yparticle];
ydir -= xdir*(xdir.dot(ydir));
ydir /= SQRT(ydir.dot(ydir));
ydir /= sqrt(ydir.dot(ydir));
}
zdir = xdir.cross(ydir);
// Compute matrices we will need later.
RealOpenMM (&a)[3][3] = A[particle].v;
RealOpenMM (&b)[3][3] = B[particle].v;
RealOpenMM (&g)[3][3] = G[particle].v;
double (&a)[3][3] = A[particle].v;
double (&b)[3][3] = B[particle].v;
double (&g)[3][3] = G[particle].v;
a[0][0] = xdir[0];
a[0][1] = xdir[1];
a[0][2] = xdir[2];
......@@ -277,8 +265,8 @@ void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions)
a[2][0] = zdir[0];
a[2][1] = zdir[1];
a[2][2] = zdir[2];
RealVec r2(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz);
RealVec e2(1/sqrt(p.ex), 1/sqrt(p.ey), 1/sqrt(p.ez));
Vec3 r2(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz);
Vec3 e2(1/sqrt(p.ex), 1/sqrt(p.ey), 1/sqrt(p.ez));
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) {
b[i][j] = 0;
......@@ -291,33 +279,33 @@ void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions)
}
}
void CpuGayBerneForce::applyTorques(const vector<RealVec>& positions, vector<RealVec>& forces) {
void CpuGayBerneForce::applyTorques(const vector<Vec3>& positions, vector<Vec3>& forces) {
int numParticles = particles.size();
int numThreads = threadTorque.size();
for (int particle = 0; particle < numParticles; particle++) {
ParticleInfo& p = particles[particle];
RealVec pos = positions[particle];
Vec3 pos = positions[particle];
if (p.xparticle != -1) {
// Add up the torques from the individual threads.
RealVec torque;
Vec3 torque;
for (int i = 0; i < numThreads; i++)
torque += threadTorque[i][particle];
// Apply a force to the x particle.
RealVec dx = positions[p.xparticle]-pos;
Vec3 dx = positions[p.xparticle]-pos;
double dx2 = dx.dot(dx);
RealVec f = torque.cross(dx)/dx2;
Vec3 f = torque.cross(dx)/dx2;
forces[p.xparticle] += f;
forces[particle] -= f;
if (p.yparticle != -1) {
// Apply a force to the y particle. This is based on the component of the torque
// that was not already applied to the x particle.
RealVec dy = positions[p.yparticle]-pos;
Vec3 dy = positions[p.yparticle]-pos;
double dy2 = dy.dot(dy);
RealVec torque2 = dx*(torque.dot(dx)/dx2);
Vec3 torque2 = dx*(torque.dot(dx)/dx2);
f = torque2.cross(dy)/dy2;
forces[p.yparticle] += f;
forces[particle] -= f;
......@@ -326,27 +314,27 @@ void CpuGayBerneForce::applyTorques(const vector<RealVec>& positions, vector<Rea
}
}
RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, RealOpenMM sigma, RealOpenMM epsilon, const RealVec* positions,
float* forces, vector<RealVec>& torques, const RealVec* boxVectors) {
double CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, double sigma, double epsilon, const Vec3* positions,
float* forces, vector<Vec3>& torques, const Vec3* boxVectors) {
// Compute the displacement and check against the cutoff.
RealOpenMM deltaR[ReferenceForce::LastDeltaRIndex];
double deltaR[ReferenceForce::LastDeltaRIndex];
if (nonbondedMethod == GayBerneForce::CutoffPeriodic)
ReferenceForce::getDeltaRPeriodic(positions[particle2], positions[particle1], boxVectors, deltaR);
else
ReferenceForce::getDeltaR(positions[particle2], positions[particle1], deltaR);
RealOpenMM r = deltaR[ReferenceForce::RIndex];
double r = deltaR[ReferenceForce::RIndex];
if (nonbondedMethod != GayBerneForce::NoCutoff && r >= cutoffDistance)
return 0;
RealOpenMM rInv = 1/r;
RealVec dr(deltaR[ReferenceForce::XIndex], deltaR[ReferenceForce::YIndex], deltaR[ReferenceForce::ZIndex]);
RealVec drUnit = dr*rInv;
double rInv = 1/r;
Vec3 dr(deltaR[ReferenceForce::XIndex], deltaR[ReferenceForce::YIndex], deltaR[ReferenceForce::ZIndex]);
Vec3 drUnit = dr*rInv;
// Compute the switching function.
RealOpenMM switchValue = 1, switchDeriv = 0;
double switchValue = 1, switchDeriv = 0;
if (useSwitchingFunction && r > switchingDistance) {
RealOpenMM t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
double t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
switchValue = 1+t*t*t*(-10+t*(15-t*6));
switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
}
......@@ -354,11 +342,11 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
// Interactions between two point particles can be computed more easily.
if (particles[particle1].isPointParticle && particles[particle2].isPointParticle) {
RealOpenMM sig = sigma*rInv;
RealOpenMM sig2 = sig*sig;
RealOpenMM sig6 = sig2*sig2*sig2;
RealOpenMM energy = 4*epsilon*(sig6-1)*sig6;
RealVec force = drUnit*(switchValue*4*epsilon*(12*sig6 - 6)*sig6*rInv - energy*switchDeriv);
double sig = sigma*rInv;
double sig2 = sig*sig;
double sig6 = sig2*sig2*sig2;
double energy = 4*epsilon*(sig6-1)*sig6;
Vec3 force = drUnit*(switchValue*4*epsilon*(12*sig6 - 6)*sig6*rInv - energy*switchDeriv);
forces[4*particle1] += force[0];
forces[4*particle1+1] += force[1];
forces[4*particle1+2] += force[2];
......@@ -374,31 +362,31 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
Matrix G12 = G[particle1]+G[particle2];
Matrix B12inv = B12.inverse();
Matrix G12inv = G12.inverse();
RealOpenMM detG12 = G12.determinant();
double detG12 = G12.determinant();
// Estimate the distance between the ellipsoids and compute the first terms needed for the energy.
RealOpenMM sigma12 = 1/SQRT(0.5*drUnit.dot(G12inv*drUnit));
RealOpenMM h12 = r - sigma12;
RealOpenMM rho = sigma/(h12+sigma);
RealOpenMM rho2 = rho*rho;
RealOpenMM rho6 = rho2*rho2*rho2;
RealOpenMM u = 4*epsilon*(rho6*rho6-rho6);
RealOpenMM eta = SQRT(2*s[particle1]*s[particle2]/detG12);
RealOpenMM chi = 2*drUnit.dot(B12inv*drUnit);
double sigma12 = 1/sqrt(0.5*drUnit.dot(G12inv*drUnit));
double h12 = r - sigma12;
double rho = sigma/(h12+sigma);
double rho2 = rho*rho;
double rho6 = rho2*rho2*rho2;
double u = 4*epsilon*(rho6*rho6-rho6);
double eta = sqrt(2*s[particle1]*s[particle2]/detG12);
double chi = 2*drUnit.dot(B12inv*drUnit);
chi *= chi;
RealOpenMM energy = u*eta*chi;
double energy = u*eta*chi;
// Compute the terms needed for the force.
RealVec kappa = G12inv*dr;
RealVec iota = B12inv*dr;
RealOpenMM rInv2 = rInv*rInv;
RealOpenMM dUSLJdr = 24*epsilon*(2*rho6-1)*rho6*rho/sigma;
RealOpenMM temp = 0.5*sigma12*sigma12*sigma12*rInv2;
RealVec dudr = (drUnit + (kappa-drUnit*kappa.dot(drUnit))*temp)*dUSLJdr;
RealVec dchidr = (iota-drUnit*iota.dot(drUnit))*(-8*rInv2*SQRT(chi));
RealVec force = (dchidr*u + dudr*chi)*(eta*switchValue) - drUnit*(energy*switchDeriv);
Vec3 kappa = G12inv*dr;
Vec3 iota = B12inv*dr;
double rInv2 = rInv*rInv;
double dUSLJdr = 24*epsilon*(2*rho6-1)*rho6*rho/sigma;
double temp = 0.5*sigma12*sigma12*sigma12*rInv2;
Vec3 dudr = (drUnit + (kappa-drUnit*kappa.dot(drUnit))*temp)*dUSLJdr;
Vec3 dchidr = (iota-drUnit*iota.dot(drUnit))*(-8*rInv2*sqrt(chi));
Vec3 force = (dchidr*u + dudr*chi)*(eta*switchValue) - drUnit*(energy*switchDeriv);
forces[4*particle1] += force[0];
forces[4*particle1+1] += force[1];
forces[4*particle1+2] += force[2];
......@@ -413,13 +401,13 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
ParticleInfo& p = particles[particle];
if (p.isPointParticle)
continue;
RealVec dudq = (kappa*G[particle]).cross(kappa*(temp*dUSLJdr));
RealVec dchidq = (iota*B[particle]).cross(iota)*(-4*rInv2);
RealOpenMM (&g12)[3][3] = G12.v;
RealOpenMM (&a)[3][3] = A[particle].v;
RealVec scale = RealVec(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz)*(-0.5*eta/detG12);
Vec3 dudq = (kappa*G[particle]).cross(kappa*(temp*dUSLJdr));
Vec3 dchidq = (iota*B[particle]).cross(iota)*(-4*rInv2);
double (&g12)[3][3] = G12.v;
double (&a)[3][3] = A[particle].v;
Vec3 scale = Vec3(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz)*(-0.5*eta/detG12);
Matrix D;
RealOpenMM (&d)[3][3] = D.v;
double (&d)[3][3] = D.v;
d[0][0] = scale[0]*(2*a[0][0]*(g12[1][1]*g12[2][2] - g12[1][2]*g12[2][1]) +
a[0][2]*(g12[1][2]*g12[0][1] + g12[1][0]*g12[2][1] - g12[1][1]*(g12[0][2] + g12[2][0])) +
a[0][1]*(g12[0][2]*g12[2][1] + g12[2][0]*g12[1][2] - g12[2][2]*(g12[0][1] + g12[1][0])));
......@@ -447,10 +435,10 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
d[2][2] = scale[2]*( a[2][0]*(g12[0][1]*g12[1][2] + g12[2][1]*g12[1][0] - g12[1][1]*(g12[0][2] + g12[2][0])) +
a[2][1]*(g12[1][0]*g12[0][2] + g12[2][0]*g12[0][1] - g12[0][0]*(g12[1][2] + g12[2][1])) +
2*a[2][2]*(g12[1][1]*g12[0][0] - g12[1][0]*g12[0][1]));
RealVec detadq;
Vec3 detadq;
for (int i = 0; i < 3; i++)
detadq += RealVec(a[i][0], a[i][1], a[i][2]).cross(RealVec(d[i][0], d[i][1], d[i][2]));
RealVec torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue;
detadq += Vec3(a[i][0], a[i][1], a[i][2]).cross(Vec3(d[i][0], d[i][1], d[i][2]));
Vec3 torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue;
torques[particle] -= torque;
}
return switchValue*energy;
......
......@@ -41,43 +41,44 @@
#include "ReferenceTabulatedFunction.h"
#include "openmm/Context.h"
#include "openmm/OpenMMException.h"
#include "openmm/Vec3.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomNonbondedForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/vectorize.h"
#include "RealVec.h"
#include "lepton/CompiledExpression.h"
#include "lepton/CustomFunction.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include <iostream>
#include "lepton/ParsedExpression.h"
using namespace OpenMM;
using namespace std;
static vector<RealVec>& extractPositions(ContextImpl& context) {
static vector<Vec3>& extractPositions(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *((vector<RealVec>*) data->positions);
return *((vector<Vec3>*) data->positions);
}
static vector<RealVec>& extractVelocities(ContextImpl& context) {
static vector<Vec3>& extractVelocities(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *((vector<RealVec>*) data->velocities);
return *((vector<Vec3>*) data->velocities);
}
static vector<RealVec>& extractForces(ContextImpl& context) {
static vector<Vec3>& extractForces(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *((vector<RealVec>*) data->forces);
return *((vector<Vec3>*) data->forces);
}
static RealVec& extractBoxSize(ContextImpl& context) {
static Vec3& extractBoxSize(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *(RealVec*) data->periodicBoxSize;
return *(Vec3*) data->periodicBoxSize;
}
static RealVec* extractBoxVectors(ContextImpl& context) {
static Vec3* extractBoxVectors(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return (RealVec*) data->periodicBoxVectors;
return (Vec3*) data->periodicBoxVectors;
}
static ReferenceConstraints& extractConstraints(ContextImpl& context) {
......@@ -106,14 +107,14 @@ static void validateVariables(const Lepton::ExpressionTreeNode& node, const set<
* for a leapfrog integrator.
*/
static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>& masses, double timeShift) {
vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& velData = extractVelocities(context);
vector<RealVec>& forceData = extractForces(context);
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& velData = extractVelocities(context);
vector<Vec3>& forceData = extractForces(context);
int numParticles = context.getSystem().getNumParticles();
// Compute the shifted velocities.
vector<RealVec> shiftedVel(numParticles);
vector<Vec3> shiftedVel(numParticles);
for (int i = 0; i < numParticles; ++i) {
if (masses[i] > 0)
shiftedVel[i] = velData[i]+forceData[i]*(timeShift/masses[i]);
......@@ -137,40 +138,32 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy;
}
class CpuCalcForcesAndEnergyKernel::SumForceTask : public ThreadPool::Task {
public:
SumForceTask(int numParticles, vector<RealVec>& forceData, CpuPlatform::PlatformData& data) : numParticles(numParticles), forceData(forceData), data(data) {
}
void execute(ThreadPool& threads, int threadIndex) {
// Sum the contributions to forces that have been calculated by different threads.
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
}
int numParticles;
vector<RealVec>& forceData;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
class CpuCalcForcesAndEnergyKernel::InitForceTask : public ThreadPool::Task {
public:
InitForceTask(int numParticles, ContextImpl& context, CpuPlatform::PlatformData& data) : numParticles(numParticles), positionsValid(true), context(context), data(data) {
}
void execute(ThreadPool& threads, int threadIndex) {
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
bool positionsValid = true;
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Convert the positions to single precision and apply periodic boundary conditions
AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context);
RealVec* boxVectors = extractBoxVectors(context);
vector<Vec3>& posData = extractPositions(context);
Vec3* boxVectors = extractBoxVectors(context);
double boxSize[3] = {boxVectors[0][0], boxVectors[1][1], boxVectors[2][2]};
double invBoxSize[3] = {1/boxVectors[0][0], 1/boxVectors[1][1], 1/boxVectors[2][2]};
bool triclinic = (boxVectors[0][1] != 0 || boxVectors[0][2] != 0 || boxVectors[1][0] != 0 || boxVectors[1][2] != 0 || boxVectors[2][0] != 0 || boxVectors[2][1] != 0);
......@@ -181,7 +174,7 @@ public:
if (data.isPeriodic) {
if (triclinic) {
for (int i = start; i < end; i++) {
RealVec pos = posData[i];
Vec3 pos = posData[i];
pos -= boxVectors[2]*floor(pos[2]*invBoxSize[2]);
pos -= boxVectors[1]*floor(pos[1]*invBoxSize[1]);
pos -= boxVectors[0]*floor(pos[0]*invBoxSize[0]);
......@@ -193,7 +186,7 @@ public:
else {
for (int i = start; i < end; i++) {
for (int j = 0; j < 3; j++) {
RealOpenMM x = posData[i][j];
double x = posData[i][j];
double base = floor(x*invBoxSize[j])*boxSize[j];
posq[4*i+j] = (float) (x-base);
}
......@@ -218,36 +211,9 @@ public:
fvec4 zero(0.0f);
for (int j = 0; j < numParticles; j++)
zero.store(&data.threadForce[threadIndex][j*4]);
}
int numParticles;
bool positionsValid;
ContextImpl& context;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
InitForceTask task(numParticles, context, data);
data.threads.execute(task);
});
data.threads.waitForThreads();
if (!task.positionsValid)
if (!positionsValid)
throw OpenMMException("Particle coordinate is nan");
// Determine whether we need to recompute the neighbor list.
......@@ -259,9 +225,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double farCutoff2 = 0.5*padding*padding;
int maxNumMoved = numParticles/10;
vector<int> moved;
vector<RealVec>& posData = extractPositions(context);
vector<Vec3>& posData = extractPositions(context);
for (int i = 0; i < numParticles; i++) {
RealVec delta = posData[i]-lastPositions[i];
Vec3 delta = posData[i]-lastPositions[i];
double dist2 = delta.dot(delta);
if (dist2 > closeCutoff2) {
moved.push_back(i);
......@@ -280,11 +246,11 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double paddedCutoff2 = data.paddedCutoff*data.paddedCutoff;
for (int i = 1; i < numMoved && !needRecompute; i++)
for (int j = 0; j < i; j++) {
RealVec delta = posData[moved[i]]-posData[moved[j]];
Vec3 delta = posData[moved[i]]-posData[moved[j]];
if (delta.dot(delta) < cutoff2) {
// These particles should interact. See if they are in the neighbor list.
RealVec oldDelta = lastPositions[moved[i]]-lastPositions[moved[j]];
Vec3 oldDelta = lastPositions[moved[i]]-lastPositions[moved[j]];
if (oldDelta.dot(oldDelta) > paddedCutoff2) {
needRecompute = true;
break;
......@@ -302,8 +268,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
// Sum the forces from all the threads.
SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data);
data.threads.execute(task);
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Sum the contributions to forces that have been calculated by different threads.
int numParticles = context.getSystem().getNumParticles();
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
vector<Vec3>& forceData = extractForces(context);
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
});
data.threads.waitForThreads();
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
}
......@@ -324,9 +305,9 @@ void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const Har
angleIndexArray = new int*[numAngles];
for (int i = 0; i < numAngles; i++)
angleIndexArray[i] = new int[3];
angleParamArray = new RealOpenMM*[numAngles];
angleParamArray = new double*[numAngles];
for (int i = 0; i < numAngles; i++)
angleParamArray[i] = new RealOpenMM[2];
angleParamArray[i] = new double[2];
for (int i = 0; i < numAngles; ++i) {
int particle1, particle2, particle3;
double angle, k;
......@@ -334,17 +315,17 @@ void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const Har
angleIndexArray[i][0] = particle1;
angleIndexArray[i][1] = particle2;
angleIndexArray[i][2] = particle3;
angleParamArray[i][0] = (RealOpenMM) angle;
angleParamArray[i][1] = (RealOpenMM) k;
angleParamArray[i][0] = angle;
angleParamArray[i][1] = k;
}
bondForce.initialize(system.getNumParticles(), numAngles, 3, angleIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions();
}
double CpuCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context);
RealOpenMM energy = 0;
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& forceData = extractForces(context);
double energy = 0;
ReferenceAngleBondIxn angleBond;
if (usePeriodic)
angleBond.setPeriodic(extractBoxVectors(context));
......@@ -364,8 +345,8 @@ void CpuCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& conte
force.getAngleParameters(i, particle1, particle2, particle3, angle, k);
if (particle1 != angleIndexArray[i][0] || particle2 != angleIndexArray[i][1] || particle3 != angleIndexArray[i][2])
throw OpenMMException("updateParametersInContext: The set of particles in an angle has changed");
angleParamArray[i][0] = (RealOpenMM) angle;
angleParamArray[i][1] = (RealOpenMM) k;
angleParamArray[i][0] = angle;
angleParamArray[i][1] = k;
}
}
......@@ -385,9 +366,9 @@ void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const P
torsionIndexArray = new int*[numTorsions];
for (int i = 0; i < numTorsions; i++)
torsionIndexArray[i] = new int[4];
torsionParamArray = new RealOpenMM*[numTorsions];
torsionParamArray = new double*[numTorsions];
for (int i = 0; i < numTorsions; i++)
torsionParamArray[i] = new RealOpenMM[3];
torsionParamArray[i] = new double[3];
for (int i = 0; i < numTorsions; ++i) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
......@@ -396,18 +377,18 @@ void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const P
torsionIndexArray[i][1] = particle2;
torsionIndexArray[i][2] = particle3;
torsionIndexArray[i][3] = particle4;
torsionParamArray[i][0] = (RealOpenMM) k;
torsionParamArray[i][1] = (RealOpenMM) phase;
torsionParamArray[i][2] = (RealOpenMM) periodicity;
torsionParamArray[i][0] = k;
torsionParamArray[i][1] = phase;
torsionParamArray[i][2] = periodicity;
}
bondForce.initialize(system.getNumParticles(), numTorsions, 4, torsionIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions();
}
double CpuCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context);
RealOpenMM energy = 0;
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& forceData = extractForces(context);
double energy = 0;
ReferenceProperDihedralBond periodicTorsionBond;
if (usePeriodic)
periodicTorsionBond.setPeriodic(extractBoxVectors(context));
......@@ -427,9 +408,9 @@ void CpuCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& con
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, periodicity, phase, k);
if (particle1 != torsionIndexArray[i][0] || particle2 != torsionIndexArray[i][1] || particle3 != torsionIndexArray[i][2] || particle4 != torsionIndexArray[i][3])
throw OpenMMException("updateParametersInContext: The set of particles in a torsion has changed");
torsionParamArray[i][0] = (RealOpenMM) k;
torsionParamArray[i][1] = (RealOpenMM) phase;
torsionParamArray[i][2] = (RealOpenMM) periodicity;
torsionParamArray[i][0] = k;
torsionParamArray[i][1] = phase;
torsionParamArray[i][2] = periodicity;
}
}
......@@ -449,9 +430,9 @@ void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsi
torsionIndexArray = new int*[numTorsions];
for (int i = 0; i < numTorsions; i++)
torsionIndexArray[i] = new int[4];
torsionParamArray = new RealOpenMM*[numTorsions];
torsionParamArray = new double*[numTorsions];
for (int i = 0; i < numTorsions; i++)
torsionParamArray[i] = new RealOpenMM[6];
torsionParamArray[i] = new double[6];
for (int i = 0; i < numTorsions; ++i) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
......@@ -460,21 +441,21 @@ void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsi
torsionIndexArray[i][1] = particle2;
torsionIndexArray[i][2] = particle3;
torsionIndexArray[i][3] = particle4;
torsionParamArray[i][0] = (RealOpenMM) c0;
torsionParamArray[i][1] = (RealOpenMM) c1;
torsionParamArray[i][2] = (RealOpenMM) c2;
torsionParamArray[i][3] = (RealOpenMM) c3;
torsionParamArray[i][4] = (RealOpenMM) c4;
torsionParamArray[i][5] = (RealOpenMM) c5;
torsionParamArray[i][0] = c0;
torsionParamArray[i][1] = c1;
torsionParamArray[i][2] = c2;
torsionParamArray[i][3] = c3;
torsionParamArray[i][4] = c4;
torsionParamArray[i][5] = c5;
}
bondForce.initialize(system.getNumParticles(), numTorsions, 4, torsionIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions();
}
double CpuCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context);
RealOpenMM energy = 0;
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& forceData = extractForces(context);
double energy = 0;
ReferenceRbDihedralBond rbTorsionBond;
if (usePeriodic)
rbTorsionBond.setPeriodic(extractBoxVectors(context));
......@@ -494,12 +475,12 @@ void CpuCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
if (particle1 != torsionIndexArray[i][0] || particle2 != torsionIndexArray[i][1] || particle3 != torsionIndexArray[i][2] || particle4 != torsionIndexArray[i][3])
throw OpenMMException("updateParametersInContext: The set of particles in a torsion has changed");
torsionParamArray[i][0] = (RealOpenMM) c0;
torsionParamArray[i][1] = (RealOpenMM) c1;
torsionParamArray[i][2] = (RealOpenMM) c2;
torsionParamArray[i][3] = (RealOpenMM) c3;
torsionParamArray[i][4] = (RealOpenMM) c4;
torsionParamArray[i][5] = (RealOpenMM) c5;
torsionParamArray[i][0] = c0;
torsionParamArray[i][1] = c1;
torsionParamArray[i][2] = c2;
torsionParamArray[i][3] = c3;
torsionParamArray[i][4] = c4;
torsionParamArray[i][5] = c5;
}
}
......@@ -528,7 +509,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce* createCpuNonbondedForceVec8();
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), nonbonded(NULL) {
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
if (isVec8Supported())
nonbonded = createCpuNonbondedForceVec8();
else
......@@ -575,12 +556,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles);
C6params.resize(numParticles);
double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge;
}
......@@ -592,9 +575,9 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
force.getExceptionParameters(nb14s[i], particle1, particle2, charge, radius, depth);
bonded14IndexArray[i][0] = particle1;
bonded14IndexArray[i][1] = particle2;
bonded14ParamArray[i][0] = static_cast<RealOpenMM>(radius);
bonded14ParamArray[i][1] = static_cast<RealOpenMM>(4.0*depth);
bonded14ParamArray[i][2] = static_cast<RealOpenMM>(charge);
bonded14ParamArray[i][0] = radius;
bonded14ParamArray[i][1] = 4.0*depth;
bonded14ParamArray[i][2] = charge;
}
bondForce.initialize(system.getNumParticles(), num14, 2, bonded14IndexArray, data.threads);
......@@ -616,19 +599,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
}
else if (nonbondedMethod == PME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = alpha;
useSwitchingFunction = false;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
else
if(nonbondedMethod == LJPME){
for (int atom = 0; atom < numParticles; atom++) {
// Dispersion self term
ewaldSelfEnergy += pow(ewaldDispersionAlpha, 6.0) * C6params[atom]*C6params[atom] / 12.0;
}
}
} else {
ewaldSelfEnergy = 0.0;
}
rfDielectric = force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME);
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
}
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
......@@ -646,18 +645,33 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
}
}
if (nonbondedMethod == LJPME) {
// If available, use the optimized PME implementation.
vector<string> kernelNames;
kernelNames.push_back("CalcPmeReciprocalForce");
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
}
}
}
AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context);
RealVec* boxVectors = extractBoxVectors(context);
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& forceData = extractForces(context);
Vec3* boxVectors = extractBoxVectors(context);
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
if (data.isPeriodic) {
RealVec* boxVectors = extractBoxVectors(context);
Vec3* boxVectors = extractBoxVectors(context);
double minAllowedSize = 1.999999*nonbondedCutoff;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
......@@ -669,9 +683,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded->setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance);
if (ljpme){
nonbonded->setUsePME(ewaldAlpha, gridSize);
nonbonded->setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
double nonbondedEnergy = 0;
if (includeDirect)
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
if (includeReciprocal) {
if (useOptimizedPme) {
PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
......@@ -680,13 +698,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
}
else
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
}
energy += nonbondedEnergy;
if (includeDirect) {
ReferenceLJCoulomb14 nonbonded14;
bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (data.isPeriodic)
if (data.isPeriodic && nonbondedMethod != LJPME)
energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
}
return energy;
......@@ -726,9 +744,9 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
force.getExceptionParameters(nb14s[i], particle1, particle2, charge, radius, depth);
bonded14IndexArray[i][0] = particle1;
bonded14IndexArray[i][1] = particle2;
bonded14ParamArray[i][0] = static_cast<RealOpenMM>(radius);
bonded14ParamArray[i][1] = static_cast<RealOpenMM>(4.0*depth);
bonded14ParamArray[i][2] = static_cast<RealOpenMM>(charge);
bonded14ParamArray[i][0] = radius;
bonded14ParamArray[i][1] = 4.0*depth;
bonded14ParamArray[i][2] = charge;
}
// Recompute the coefficient for the dispersion correction.
......@@ -739,7 +757,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
}
void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
......@@ -751,6 +769,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
}
}
void CpuCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedDispersionPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
}
CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) {
}
......@@ -864,9 +895,9 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
}
double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context);
RealVec* boxVectors = extractBoxVectors(context);
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& forceData = extractForces(context);
Vec3* boxVectors = extractBoxVectors(context);
double energy = 0;
bool periodic = (nonbondedMethod == CutoffPeriodic);
if (nonbondedMethod != NoCutoff)
......@@ -953,7 +984,7 @@ void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCFo
double CpuCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (data.isPeriodic) {
RealVec& boxSize = extractBoxSize(context);
Vec3& boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
obc.setPeriodic(floatBoxSize);
}
......@@ -1024,14 +1055,14 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
vector<double> parameters;
force.getParticleParameters(i, parameters);
for (int j = 0; j < numPerParticleParameters; j++)
particleParamArray[i][j] = static_cast<RealOpenMM>(parameters[j]);
particleParamArray[i][j] = parameters[j];
}
for (int i = 0; i < numPerParticleParameters; i++)
particleParameterNames.push_back(force.getPerParticleParameterName(i));
for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i));
nonbondedMethod = CalcCustomGBForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedCutoff = (RealOpenMM) force.getCutoffDistance();
nonbondedCutoff = force.getCutoffDistance();
if (nonbondedMethod != NoCutoff)
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, force.getNumExclusions() > 0, exclusions);
......@@ -1133,9 +1164,9 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
}
double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& forceData = extractForces(context);
RealOpenMM energy = 0;
RealVec* boxVectors = extractBoxVectors(context);
vector<Vec3>& forceData = extractForces(context);
double energy = 0;
Vec3* boxVectors = extractBoxVectors(context);
if (data.isPeriodic)
ixn->setPeriodic(extractBoxSize(context));
if (nonbondedMethod != NoCutoff) {
......@@ -1165,7 +1196,7 @@ void CpuCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, c
vector<double> parameters;
force.getParticleParameters(i, parameters);
for (int j = 0; j < numParameters; j++)
particleParamArray[i][j] = static_cast<RealOpenMM>(parameters[j]);
particleParamArray[i][j] = static_cast<double>(parameters[j]);
}
}
......@@ -1208,7 +1239,7 @@ double CpuCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
for (int i = 0; i < (int) globalParameterNames.size(); i++)
globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]);
if (nonbondedMethod == CutoffPeriodic) {
RealVec* boxVectors = extractBoxVectors(context);
Vec3* boxVectors = extractBoxVectors(context);
double minAllowedSize = 2*cutoffDistance;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
......@@ -1232,7 +1263,7 @@ void CpuCalcCustomManyParticleForceKernel::copyParametersToContext(ContextImpl&
int type;
force.getParticleParameters(i, parameters, type);
for (int j = 0; j < numParameters; j++)
particleParamArray[i][j] = static_cast<RealOpenMM>(parameters[j]);
particleParamArray[i][j] = static_cast<double>(parameters[j]);
}
}
......@@ -1269,7 +1300,7 @@ void CpuIntegrateLangevinStepKernel::initialize(const System& system, const Lang
int numParticles = system.getNumParticles();
masses.resize(numParticles);
for (int i = 0; i < numParticles; ++i)
masses[i] = static_cast<RealOpenMM>(system.getParticleMass(i));
masses[i] = static_cast<double>(system.getParticleMass(i));
data.random.initialize(integrator.getRandomNumberSeed(), data.threads.getNumThreads());
}
......@@ -1277,16 +1308,15 @@ void CpuIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langevi
double temperature = integrator.getTemperature();
double friction = integrator.getFriction();
double stepSize = integrator.getStepSize();
vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& velData = extractVelocities(context);
vector<RealVec>& forceData = extractForces(context);
vector<Vec3>& posData = extractPositions(context);
vector<Vec3>& velData = extractVelocities(context);
vector<Vec3>& forceData = extractForces(context);
if (dynamics == 0 || temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
// Recreate the computation objects with the new parameters.
if (dynamics)
delete dynamics;
RealOpenMM tau = (friction == 0.0 ? 0.0 : 1.0/friction);
dynamics = new CpuLangevinDynamics(context.getSystem().getNumParticles(), stepSize, tau, temperature, data.threads, data.random);
dynamics = new CpuLangevinDynamics(context.getSystem().getNumParticles(), stepSize, friction, temperature, data.threads, data.random);
dynamics->setReferenceConstraintAlgorithm(&extractConstraints(context));
prevTemp = temperature;
prevFriction = friction;
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
......@@ -29,45 +29,15 @@
using namespace OpenMM;
using namespace std;
class CpuLangevinDynamics::Update1Task : public ThreadPool::Task {
public:
Update1Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate1(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update2Task : public ThreadPool::Task {
public:
Update2Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate2(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update3Task : public ThreadPool::Task {
public:
Update3Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate3(threadIndex);
}
CpuLangevinDynamics& owner;
};
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, tau, temperature), threads(threads), random(random) {
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, double deltaT, double friction, double temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
}
CpuLangevinDynamics::~CpuLangevinDynamics() {
}
void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities,
vector<RealVec>& forces, vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) {
void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
vector<Vec3>& forces, vector<double>& inverseMasses, vector<Vec3>& xPrime) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
......@@ -79,13 +49,12 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update1Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate1(threadIndex); });
threads.waitForThreads();
}
void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities,
vector<RealVec>& forces, vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) {
void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
vector<Vec3>& forces, vector<double>& inverseMasses, vector<Vec3>& xPrime) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
......@@ -97,13 +66,12 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update2Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate2(threadIndex); });
threads.waitForThreads();
}
void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities,
vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) {
void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
vector<double>& inverseMasses, vector<Vec3>& xPrime) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
......@@ -114,44 +82,44 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish.
Update3Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate3(threadIndex); });
threads.waitForThreads();
}
void CpuLangevinDynamics::threadUpdate1(int threadIndex) {
const RealOpenMM tau = getTau();
const RealOpenMM vscale = EXP(-getDeltaT()/tau);
const RealOpenMM fscale = (1-vscale)*tau;
const RealOpenMM kT = BOLTZ*getTemperature();
const RealOpenMM noisescale = SQRT(2*kT/tau)*SQRT(0.5*(1-vscale*vscale)*tau);
double dt = getDeltaT();
double friction = getFriction();
const double vscale = exp(-dt*friction);
const double fscale = (friction == 0 ? dt : (1-vscale)/friction);
const double kT = BOLTZ*getTemperature();
const double noisescale = sqrt(kT*(1-vscale*vscale));
int start = threadIndex*numberOfAtoms/threads.getNumThreads();
int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
for (int i = start; i < end; i++) {
if (inverseMasses[i] != 0.0) {
RealOpenMM sqrtInvMass = SQRT(inverseMasses[i]);
RealVec noise(random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex));
double sqrtInvMass = sqrt(inverseMasses[i]);
Vec3 noise(random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex));
velocities[i] = velocities[i]*vscale + forces[i]*(fscale*inverseMasses[i]) + noise*(noisescale*sqrtInvMass);
}
}
}
void CpuLangevinDynamics::threadUpdate2(int threadIndex) {
const RealOpenMM dt = getDeltaT();
const double dt = getDeltaT();
int start = threadIndex*numberOfAtoms/threads.getNumThreads();
int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
for (int i = start; i < end; i++) {
if (inverseMasses[i] != 0.0) {
RealOpenMM sqrtInvMass = SQRT(inverseMasses[i]);
double sqrtInvMass = sqrt(inverseMasses[i]);
xPrime[i] = atomCoordinates[i]+velocities[i]*dt;
}
}
}
void CpuLangevinDynamics::threadUpdate3(int threadIndex) {
const RealOpenMM invStepSize = 1.0/getDeltaT();
const double invStepSize = 1.0/getDeltaT();
int start = threadIndex*numberOfAtoms/threads.getNumThreads();
int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -59,7 +59,7 @@ public:
*/
class CpuNeighborList::Voxels {
public:
Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* boxVectors, bool usePeriodic) :
Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const Vec3* boxVectors, bool usePeriodic) :
blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
......@@ -409,21 +409,11 @@ private:
vector<vector<vector<pair<float, int> > > > bins;
};
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
public:
ThreadTask(CpuNeighborList& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeNeighborList(threads, threadIndex);
}
CpuNeighborList& owner;
};
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
}
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
const RealVec* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
int numBlocks = (numAtoms+blockSize-1)/blockSize;
blockNeighbors.resize(numBlocks);
blockExclusions.resize(numBlocks);
......@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Sort the atoms based on a Hilbert curve.
atomBins.resize(numAtoms);
ThreadTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeNeighborList(threads, threadIndex); });
threads.waitForThreads();
sort(atomBins.begin(), atomBins.end());
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -30,6 +30,7 @@
#include "ReferencePME.h"
#include "openmm/internal/gmx_atomic.h"
#include <algorithm>
#include <iostream>
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
......@@ -41,23 +42,14 @@ using namespace OpenMM;
const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
class CpuNonbondedForce::ComputeDirectTask : public ThreadPool::Task {
public:
ComputeDirectTask(CpuNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeDirect(threads, threadIndex);
}
CpuNonbondedForce& owner;
};
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) {
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
}
CpuNonbondedForce::~CpuNonbondedForce() {
......@@ -78,10 +70,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
tableIsValid = false;
cutoff = true;
cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
}
if(alphaDispersionEwald != 0.0f){
// We set this here, in case setUseCutoff is called after the dispersion alpha is set.
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
/**---------------------------------------------------------------------------------------
......@@ -96,7 +99,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
switchingDistance = distance;
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff
......@@ -106,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
void CpuNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
......@@ -124,11 +127,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
}
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
......@@ -139,18 +142,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
numRx = kmaxx;
numRy = kmaxy;
numRz = kmaxz;
ewald = true;
tabulateEwaldScaleFactor();
}
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
numRx = kmaxx;
numRy = kmaxy;
numRz = kmaxz;
ewald = true;
tabulateEwaldScaleFactor();
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
......@@ -159,19 +162,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2];
pme = true;
tabulateEwaldScaleFactor();
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2];
pme = true;
tabulateEwaldScaleFactor();
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseLJPME(float alpha, int meshSize[3]) {
if (alpha != alphaDispersionEwald)
expTableIsValid = false;
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
tabulateExpTerms();
if(cutoffDistance != 0.0f){
// We set this here, in case setUseLJPME is called after the cutoff is set
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if (tableIsValid)
return;
tableIsValid = true;
......@@ -187,10 +220,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
vector<RealVec>& forces, double* totalEnergy) const {
void CpuNonbondedForce::tabulateExpTerms() {
if (expTableIsValid)
return;
expTableIsValid = true;
exptermsDX = cutoffDistance/NUM_TABLE_POINTS;
exptermsDXInv = 1.0f/exptermsDX;
exptermsTable.resize(NUM_TABLE_POINTS+4);
dExptermsTable.resize(NUM_TABLE_POINTS+4);
for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
double r = i*ewaldDX;
double dalphaR = alphaDispersionEwald*r;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
exptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
dExptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<float> &C6params, const vector<set<int> >& exclusions,
vector<Vec3>& forces, double* totalEnergy) const {
typedef std::complex<float> d_complex;
static const float epsilon = 1.0;
......@@ -203,14 +256,37 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
if (pme) {
pme_t pmedata;
pme_init(&pmedata, alphaEwald, numberOfAtoms, meshDim, 5, 1);
vector<RealOpenMM> charges(numberOfAtoms);
vector<double> charges(numberOfAtoms);
for (int i = 0; i < numberOfAtoms; i++)
charges[i] = posq[4*i+3];
RealOpenMM recipEnergy = 0.0;
double recipEnergy = 0.0;
pme_exec(pmedata, atomCoordinates, forces, charges, periodicBoxVectors, &recipEnergy);
if (totalEnergy)
*totalEnergy += recipEnergy;
pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<Vec3> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = C6params[i];
dpmeforces.push_back(Vec3());
}
double recipDispersionEnergy = 0.0;
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
}
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
pme_destroy(pmedata);
}
}
// Ewald method
......@@ -224,7 +300,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms);
......@@ -232,15 +308,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
for (int i = 0; (i < numberOfAtoms); i++) {
float* pos = posq+4*i;
for (int m = 0; (m < 3); m++)
EIR(0, i, m) = d_complex(1,0);
EIR(0, i, m) = d_complex(1,0);
for (int m=0; (m<3); m++)
EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
sin(pos[m]*recipBoxSize[m]));
EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
sin(pos[m]*recipBoxSize[m]));
for (int j=2; (j<kmax); j++)
for (int m=0; (m<3); m++)
EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
for (int m=0; (m<3); m++)
EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
}
// calculate reciprocal space energy and forces
......@@ -254,11 +330,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
float ky = ry * recipBoxSize[1];
if (ry >= 0) {
for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
}
else {
for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
}
for (int rz = lowrz; rz < numRz; rz++) {
if (rz >= 0) {
......@@ -300,14 +376,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
}
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<float>& C6params, const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
this->posq = posq;
this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0];
this->C6params = &C6params[0];
this->exclusions = &exclusions[0];
this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL);
......@@ -318,8 +395,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); });
threads.waitForThreads();
// Signal the threads to subtract the exclusions.
......@@ -350,9 +426,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float* forces = &(*threadForce)[threadIndex][0];
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (ewald || pme) {
if (ewald || pme || ljpme) {
// Compute the interactions from the neighbor list.
while (true) {
int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
if (nextBlock >= neighborList->getNumBlocks())
......@@ -370,7 +445,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
break;
int end = min(start+groupSize, numberOfAtoms);
for (int i = start; i < end; i++) {
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]);
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
if (*iter > i) {
......@@ -394,7 +469,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
}
else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
if (ljpme) {
float C6ij = C6params[i]*C6params[j];
float inverseR2 = 1.0f/r2;
float emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
if(includeEnergy)
threadEnergy[threadIndex] += emult;
float dEdR = -6.0f*C6ij*inverseR2*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j);
}
}
}
}
......@@ -444,7 +530,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
}
float sig = atomParameters[ii].first + atomParameters[jj].first;
float sig2 = inverseR*sig;
sig2 *= sig2;
sig2 *= sig2;
float sig6 = sig2*sig2*sig2;
float eps = atomParameters[ii].second*atomParameters[jj].second;
......@@ -476,7 +562,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj);
}
}
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
deltaR = posJ-posI;
......@@ -502,3 +588,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
}
float CpuNonbondedForce::exptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*exptermsTable[index] + coeff2*exptermsTable[index+1];
}
float CpuNonbondedForce::dExptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*dExptermsTable[index] + coeff2*dExptermsTable[index+1];
}
......@@ -25,6 +25,7 @@
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
#include <algorithm>
#include <iostream>
using namespace std;
using namespace OpenMM;
......@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
......@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
......@@ -278,9 +277,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
fvec4 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block.
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
......@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 sig2 = inverseR*sig;
sig2 *= sig2;
fvec4 sig6 = sig2*sig2*sig2;
fvec4 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6;
fvec4 eps = blockAtomEpsilon*atomEpsilon;
fvec4 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f);
if (useSwitch) {
......@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue;
}
if (ljpme) {
fvec4 C6ij = C6s*C6params[atom];
fvec4 inverseR2 = inverseR*inverseR;
fvec4 mysig2 = sig*sig;
fvec4 mysig6 = mysig2*mysig2*mysig2;
fvec4 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec4 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
}
else {
energy = 0.0f;
......@@ -362,7 +374,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
}
// Record the forces on the block atoms.
fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f};
transpose(f[0], f[1], f[2], f[3]);
for (int j = 0; j < 4; j++)
......@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::exptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&exptermsTable[index[0]]);
fvec4 t2(&exptermsTable[index[1]]);
fvec4 t3(&exptermsTable[index[2]]);
fvec4 t4(&exptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::dExptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&dExptermsTable[index[0]]);
fvec4 t2(&dExptermsTable[index[1]]);
fvec4 t3(&dExptermsTable[index[2]]);
fvec4 t4(&dExptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
......@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <iostream>
using namespace std;
using namespace OpenMM;
......@@ -80,8 +81,7 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions.
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
......@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
fvec8 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]], C6params[blockAtom[4]], C6params[blockAtom[5]], C6params[blockAtom[6]], C6params[blockAtom[7]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
......@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec8 sig2 = inverseR*sig;
sig2 *= sig2;
fvec8 sig6 = sig2*sig2*sig2;
fvec8 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6;
fvec8 eps = blockAtomEpsilon*atomEpsilon;
fvec8 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f);
if (useSwitch) {
......@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue;
}
if (ljpme) {
fvec8 C6ij = C6s*C6params[atom];
fvec8 inverseR2 = inverseR*inverseR;
fvec8 mysig2 = sig*sig;
fvec8 mysig6 = mysig2*mysig2*mysig2;
fvec8 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec8 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
}
else {
energy = 0.0f;
......@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::exptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&exptermsTable[indexLower[0]]);
fvec4 t2(&exptermsTable[indexLower[1]]);
fvec4 t3(&exptermsTable[indexLower[2]]);
fvec4 t4(&exptermsTable[indexLower[3]]);
fvec4 t5(&exptermsTable[indexUpper[0]]);
fvec4 t6(&exptermsTable[indexUpper[1]]);
fvec4 t7(&exptermsTable[indexUpper[2]]);
fvec4 t8(&exptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::dExptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&dExptermsTable[indexLower[0]]);
fvec4 t2(&dExptermsTable[indexLower[1]]);
fvec4 t3(&dExptermsTable[indexLower[2]]);
fvec4 t4(&dExptermsTable[indexLower[3]]);
fvec4 t5(&dExptermsTable[indexUpper[0]]);
fvec4 t6(&dExptermsTable[indexUpper[1]]);
fvec4 t7(&dExptermsTable[indexUpper[2]]);
fvec4 t8(&dExptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
#endif
......@@ -127,6 +127,8 @@ void CpuPlatform::contextDestroyed(ContextImpl& context) const {
PlatformData* data = contextData[&context];
delete data;
contextData.erase(&context);
ReferencePlatform::PlatformData* refPlatformData = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
delete refPlatformData;
}
CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -35,56 +35,10 @@
using namespace OpenMM;
using namespace std;
class CpuSETTLE::ApplyToPositionsTask : public ThreadPool::Task {
public:
ApplyToPositionsTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), atomCoordinatesP(atomCoordinatesP),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
vector<OpenMM::RealVec>& atomCoordinatesP;
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
class CpuSETTLE::ApplyToVelocitiesTask : public ThreadPool::Task {
public:
ApplyToVelocitiesTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), velocities(velocities),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
vector<OpenMM::RealVec>& velocities;
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) {
int numBlocks = 10*threads.getNumThreads();
int numClusters = settle.getNumClusters();
vector<RealOpenMM> mass(system.getNumParticles());
vector<double> mass(system.getNumParticles());
for (int i = 0; i < system.getNumParticles(); i++)
mass[i] = system.getParticleMass(i);
for (int i = 0; i < numBlocks; i++) {
......@@ -93,7 +47,7 @@ CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settl
if (start != end) {
int numThreadClusters = end-start;
vector<int> atom1(numThreadClusters), atom2(numThreadClusters), atom3(numThreadClusters);
vector<RealOpenMM> distance1(numThreadClusters), distance2(numThreadClusters);
vector<double> distance1(numThreadClusters), distance2(numThreadClusters);
for (int j = 0; j < numThreadClusters; j++)
settle.getClusterParameters(start+j, atom1[j], atom2[j], atom3[j], distance1[j], distance2[j]);
threadSettle.push_back(new ReferenceSETTLEAlgorithm(atom1, atom2, atom3, distance1, distance2, mass));
......@@ -106,14 +60,30 @@ CpuSETTLE::~CpuSETTLE() {
delete threadSettle[i];
}
void CpuSETTLE::apply(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) {
ApplyToPositionsTask task(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance, threadSettle);
threads.execute(task);
void CpuSETTLE::apply(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& atomCoordinatesP, vector<double>& inverseMasses, double tolerance) {
gmx_atomic_t atomicCounter;
gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
});
threads.waitForThreads();
}
void CpuSETTLE::applyToVelocities(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) {
ApplyToVelocitiesTask task(atomCoordinates, velocities, inverseMasses, tolerance, threadSettle);
threads.execute(task);
void CpuSETTLE::applyToVelocities(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& velocities, vector<double>& inverseMasses, double tolerance) {
gmx_atomic_t atomicCounter;
gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
});
threads.waitForThreads();
}
......@@ -16,7 +16,6 @@ ENDFOREACH(file)
ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}_static ${PTHREADS_LIB_STATIC})
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
......@@ -51,16 +51,16 @@ using namespace std;
void testNeighborList(bool periodic, bool triclinic) {
const int numParticles = 500;
const float cutoff = 2.0f;
RealVec boxVectors[3];
Vec3 boxVectors[3];
if (triclinic) {
boxVectors[0] = RealVec(10, 0, 0);
boxVectors[1] = RealVec(4, 9, 0);
boxVectors[2] = RealVec(-3, -3.5, 11);
boxVectors[0] = Vec3(10, 0, 0);
boxVectors[1] = Vec3(4, 9, 0);
boxVectors[2] = Vec3(-3, -3.5, 11);
}
else {
boxVectors[0] = RealVec(10, 0, 0);
boxVectors[1] = RealVec(0, 9, 0);
boxVectors[2] = RealVec(0, 0, 11);
boxVectors[0] = Vec3(10, 0, 0);
boxVectors[1] = Vec3(0, 9, 0);
boxVectors[2] = Vec3(0, 0, 11);
}
const float boxSize[3] = {(float) boxVectors[0][0], (float) boxVectors[1][1], (float) boxVectors[2][2]};
const int blockSize = 8;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -494,6 +494,10 @@ public:
CudaNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded;
}
/**
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void setCharges(const std::vector<double>& charges);
/**
* Get the thread used by this context for executing parallel computations.
*/
......@@ -577,6 +581,12 @@ public:
* and order to be revalidated.
*/
void invalidateMolecules();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/
bool invalidateMolecules(CudaForceInfo* force);
private:
/**
* Compute a sorted list of device indices in decreasing order of desirability
......@@ -626,6 +636,7 @@ private:
CUfunction clearFourBuffersKernel;
CUfunction clearFiveBuffersKernel;
CUfunction clearSixBuffersKernel;
CUfunction setChargesKernel;
std::vector<CudaForceInfo*> forces;
std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups;
......@@ -638,6 +649,7 @@ private:
CudaArray* energyBuffer;
CudaArray* energyParamDerivBuffer;
CudaArray* atomIndexDevice;
CudaArray* chargeBuffer;
std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -198,7 +198,6 @@ public:
*/
void loadCheckpoint(ContextImpl& context, std::istream& stream);
private:
class GetPositionsTask;
CudaContext& cu;
};
......@@ -292,9 +291,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private:
class ForceInfo;
int numBonds;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
};
......@@ -332,9 +333,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
private:
class ForceInfo;
int numBonds;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
......@@ -375,9 +378,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
private:
class ForceInfo;
int numAngles;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
};
......@@ -415,9 +420,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
private:
class ForceInfo;
int numAngles;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
......@@ -458,9 +465,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
};
......@@ -498,9 +507,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params1;
CudaArray* params2;
......@@ -539,9 +550,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
std::vector<int2> mapPositionsVec;
CudaArray* coefficients;
......@@ -582,9 +595,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
......@@ -599,7 +614,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) {
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL), pmeDispersionBsplineModuliY(NULL),
pmeDispersionBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
}
~CudaCalcNonbondedForceKernel();
/**
......@@ -636,6 +652,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class SortTrait : public CudaSort::SortTrait {
int getDataSize() const {return 8;}
......@@ -647,12 +672,14 @@ private:
const char* getMaxValue() const {return "make_int2(2147483647, 2147483647)";}
const char* getSortKey() const {return "value.y";}
};
class ForceInfo;
class PmeIO;
class PmePreComputation;
class PmePostComputation;
class SyncStreamPreComputation;
class SyncStreamPostComputation;
CudaContext& cu;
ForceInfo* info;
bool hasInitializedFFT;
CudaArray* sigmaEpsilon;
CudaArray* exceptionParams;
......@@ -662,6 +689,9 @@ private:
CudaArray* pmeBsplineModuliX;
CudaArray* pmeBsplineModuliY;
CudaArray* pmeBsplineModuliZ;
CudaArray* pmeDispersionBsplineModuliX;
CudaArray* pmeDispersionBsplineModuliY;
CudaArray* pmeDispersionBsplineModuliZ;
CudaArray* pmeAtomRange;
CudaArray* pmeAtomGridIndex;
CudaArray* pmeEnergyBuffer;
......@@ -673,20 +703,29 @@ private:
CudaFFT3D* fft;
cufftHandle fftForward;
cufftHandle fftBackward;
CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward;
CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel;
CUfunction pmeDispersionGridIndexKernel;
CUfunction pmeSpreadChargeKernel;
CUfunction pmeDispersionSpreadChargeKernel;
CUfunction pmeFinishSpreadChargeKernel;
CUfunction pmeDispersionFinishSpreadChargeKernel;
CUfunction pmeEvalEnergyKernel;
CUfunction pmeEvalDispersionEnergyKernel;
CUfunction pmeConvolutionKernel;
CUfunction pmeDispersionConvolutionKernel;
CUfunction pmeInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines;
CUfunction pmeInterpolateDispersionForceKernel;
std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha;
double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int interpolateForceThreads;
int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME;
NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5;
};
......@@ -724,8 +763,10 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private:
class ForceInfo;
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes);
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
CudaArray* interactionGroupData;
......@@ -775,10 +816,12 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
class ForceInfo;
double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels;
int maxTiles;
CudaContext& cu;
ForceInfo* info;
CudaArray* params;
CudaArray* bornSum;
CudaArray* bornRadii;
......@@ -825,10 +868,12 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
class ForceInfo;
double cutoff;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues;
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaParameterSet* computedValues;
CudaParameterSet* energyDerivs;
......@@ -882,9 +927,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
private:
class ForceInfo;
int numParticles;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
......@@ -926,9 +973,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private:
class ForceInfo;
int numDonors, numAcceptors;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* donorParams;
CudaParameterSet* acceptorParams;
CudaArray* globals;
......@@ -978,9 +1027,11 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force);
private:
class ForceInfo;
int numGroups, numBonds;
bool needEnergyParamDerivs;
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
CudaArray* groupParticles;
......@@ -1031,8 +1082,10 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
private:
class ForceInfo;
int numBonds;
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
std::vector<std::string> globalParamNames;
......@@ -1077,7 +1130,9 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force);
private:
class ForceInfo;
CudaContext& cu;
ForceInfo* info;
bool hasInitializedKernel;
NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
......@@ -1139,9 +1194,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const GayBerneForce& force);
private:
class ForceInfo;
class ReorderListener;
void sortAtoms();
CudaContext& cu;
ForceInfo* info;
bool hasInitializedKernels;
int numRealParticles, numExceptions, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod;
......@@ -1432,7 +1489,7 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target);
void recordGlobalValue(double value, GlobalTarget target, CustomIntegrator& integrator);
void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step);
CudaContext& cu;
......
......@@ -71,15 +71,16 @@ public:
/**
* Add a nonbonded interaction to be evaluated by the default interaction kernel.
*
* @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
* @param usesExclusions specifies whether this interaction uses exclusions. If this is true, it must have identical exclusions to every other interaction.
* @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false)
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
* @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
* @param usesExclusions specifies whether this interaction uses exclusions. If this is true, it must have identical exclusions to every other interaction.
* @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false)
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
* @param supportsPairList specifies whether this interaction can work with a neighbor list that uses a separate pair list
*/
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup);
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool supportsPairList=false);
/**
* Add a per-atom parameter that the default interaction kernel may depend on.
*/
......@@ -189,6 +190,12 @@ public:
CudaArray& getInteractingAtoms() {
return *interactingAtoms;
}
/**
* Get the array containing single pairs in the neighbor list.
*/
CudaArray& getSinglePairs() {
return *singlePairs;
}
/**
* Get the array containing exclusion flags.
*/
......@@ -270,6 +277,8 @@ private:
CudaArray* interactingTiles;
CudaArray* interactingAtoms;
CudaArray* interactionCount;
CudaArray* singlePairs;
CudaArray* singlePairCount;
CudaArray* blockCenter;
CudaArray* blockBoundingBox;
CudaArray* sortedBlocks;
......@@ -288,8 +297,8 @@ private:
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
};
/**
......
......@@ -83,7 +83,7 @@ private:
std::vector<Kernel> kernels;
std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions;
int* tileCounts;
int2* interactionCounts;
CudaArray* contextForces;
void* pinnedPositionBuffer;
long long* pinnedForceBuffer;
......@@ -439,6 +439,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class Task;
CudaPlatform::PlatformData& data;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -52,6 +52,7 @@
#include <set>
#include <sstream>
#include <typeinfo>
#include <sys/stat.h>
#include <cudaProfiler.h>
#ifndef WIN32
#include <unistd.h>
......@@ -107,7 +108,8 @@ static int executeInWindows(const string &command) {
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), chargeBuffer(NULL),
integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
// Determine what compiler to use.
this->compiler = "\""+compiler+"\"";
......@@ -127,9 +129,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null";
int res = std::system(testCompilerCommand.c_str());
#endif
isNvccAvailable = (res == 0);
struct stat info;
isNvccAvailable = (res == 0 && stat(tempDir.c_str(), &info) == 0);
int cudaDriverVersion;
cuDriverGetVersion(&cudaDriverVersion);
static bool hasShownNvccWarning = false;
if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning) {
if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning && cudaDriverVersion < 8000) {
hasShownNvccWarning = true;
printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. ");
#ifdef WIN32
......@@ -205,14 +210,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
#if __CUDA_API_VERSION < 7000
int numThreadBlocksPerComputeUnit = (major >= 6 ? 4 : 6);
if (cudaDriverVersion < 7000) {
// This is a workaround to support GTX 980 with CUDA 6.5. It reports
// its compute capability as 5.2, but the compiler doesn't support
// anything beyond 5.0.
if (major == 5)
minor = 0;
#endif
#if __CUDA_API_VERSION < 8000
}
if (cudaDriverVersion < 8000) {
// This is a workaround to support Pascal with CUDA 7.5. It reports
// its compute capability as 6.x, but the compiler doesn't support
// anything beyond 5.3.
......@@ -220,7 +226,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
major = 5;
minor = 3;
}
#endif
}
gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor;
......@@ -241,7 +247,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int numThreadBlocksPerComputeUnit = 6;
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
if (useDoublePrecision) {
posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
......@@ -287,6 +292,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel = getKernel(utilities, "clearFourBuffers");
clearFiveBuffersKernel = getKernel(utilities, "clearFiveBuffers");
clearSixBuffersKernel = getKernel(utilities, "clearSixBuffers");
setChargesKernel = getKernel(utilities, "setCharges");
// Set defines based on the requested precision.
......@@ -403,6 +409,8 @@ CudaContext::~CudaContext() {
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (chargeBuffer != NULL)
delete chargeBuffer;
if (integration != NULL)
delete integration;
if (expression != NULL)
......@@ -856,6 +864,25 @@ void CudaContext::clearAutoclearBuffers() {
}
}
void CudaContext::setCharges(const vector<double>& charges) {
if (chargeBuffer == NULL)
chargeBuffer = new CudaArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = charges[i];
chargeBuffer->upload(c);
}
else {
float* c = (float*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = (float) charges[i];
chargeBuffer->upload(c);
}
void* args[] = {&chargeBuffer->getDevicePointer(), &posq->getDevicePointer(), &atomIndexDevice->getDevicePointer(), &numAtoms};
executeKernel(setChargesKernel, args, numAtoms);
}
/**
* This class ensures that atom reordering doesn't break virtual sites.
*/
......@@ -1054,9 +1081,19 @@ void CudaContext::findMoleculeGroups() {
}
void CudaContext::invalidateMolecules() {
for (int i = 0; i < forces.size(); i++)
if (invalidateMolecules(forces[i]))
return;
}
bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return;
return false;
bool valid = true;
int forceIndex = -1;
for (int i = 0; i < forces.size(); i++)
if (forces[i] == force)
forceIndex = i;
for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) {
MoleculeGroup& mol = moleculeGroups[group];
vector<int>& instances = mol.instances;
......@@ -1071,22 +1108,21 @@ void CudaContext::invalidateMolecules() {
Molecule& m2 = molecules[instances[j]];
int offset2 = offsets[j];
for (int i = 0; i < (int) atoms.size() && valid; i++) {
for (int k = 0; k < (int) forces.size(); k++)
if (!forces[k]->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
valid = false;
if (!force->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
valid = false;
}
// See if the force groups are identical.
for (int i = 0; i < (int) forces.size() && valid; i++) {
for (int k = 0; k < (int) m1.groups[i].size() && valid; k++)
if (!forces[i]->areGroupsIdentical(m1.groups[i][k], m2.groups[i][k]))
if (valid && forceIndex > -1) {
for (int k = 0; k < (int) m1.groups[forceIndex].size() && valid; k++)
if (!force->areGroupsIdentical(m1.groups[forceIndex][k], m2.groups[forceIndex][k]))
valid = false;
}
}
}
if (valid)
return;
return false;
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
......@@ -1154,6 +1190,7 @@ void CudaContext::invalidateMolecules() {
for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute();
reorderAtoms();
return true;
}
void CudaContext::reorderAtoms() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment