Commit 047934e2 authored by Rafal P. Wiewiora's avatar Rafal P. Wiewiora
Browse files

Merge remote-tracking branch 'upstream/master'

parents ce3a5dc0 d12c9bd1
/* Portions copyright (c) 2009-2014 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -37,16 +37,6 @@ ...@@ -37,16 +37,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuCustomManyParticleForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomManyParticleForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomManyParticleForce& owner;
};
CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) : CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) :
threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) { threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) {
numParticles = force.getNumParticles(); numParticles = force.getNumParticles();
...@@ -98,7 +88,7 @@ CpuCustomManyParticleForce::~CpuCustomManyParticleForce() { ...@@ -98,7 +88,7 @@ CpuCustomManyParticleForce::~CpuCustomManyParticleForce() {
delete threadData[i]; delete threadData[i];
} }
void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpenMM** particleParameters, void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, double** particleParameters,
const map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce, const map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
bool includeForces, bool includeEnergy, double& energy) { bool includeForces, bool includeEnergy, double& energy) {
// Record the parameters for the threads. // Record the parameters for the threads.
...@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpe ...@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, RealOpe
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Combine the energies from all the threads. // Combine the energies from all the threads.
...@@ -191,14 +180,14 @@ void CpuCustomManyParticleForce::threadComputeForce(ThreadPool& threads, int thr ...@@ -191,14 +180,14 @@ void CpuCustomManyParticleForce::threadComputeForce(ThreadPool& threads, int thr
} }
} }
void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) { void CpuCustomManyParticleForce::setUseCutoff(double distance) {
useCutoff = true; useCutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
if (neighborList == NULL) if (neighborList == NULL)
neighborList = new CpuNeighborList(4); neighborList = new CpuNeighborList(4);
} }
void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) { void CpuCustomManyParticleForce::setPeriodic(Vec3* periodicBoxVectors) {
assert(useCutoff); assert(useCutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance); assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance); assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
...@@ -220,7 +209,7 @@ void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) { ...@@ -220,7 +209,7 @@ void CpuCustomManyParticleForce::setPeriodic(RealVec* periodicBoxVectors) {
} }
void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availableParticles, vector<int>& particleSet, int loopIndex, int startIndex, void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availableParticles, vector<int>& particleSet, int loopIndex, int startIndex,
RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) { double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
int numParticles = availableParticles.size(); int numParticles = availableParticles.size();
double cutoff2 = cutoffDistance*cutoffDistance; double cutoff2 = cutoffDistance*cutoffDistance;
int checkRange = (centralParticleMode ? 1 : loopIndex); int checkRange = (centralParticleMode ? 1 : loopIndex);
...@@ -254,7 +243,7 @@ void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availablePart ...@@ -254,7 +243,7 @@ void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availablePart
} }
} }
void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, RealOpenMM** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
// Select the ordering to use for the particles. // Select the ordering to use for the particles.
vector<int>& permutedParticles = data.permutedParticles; vector<int>& permutedParticles = data.permutedParticles;
......
/* Portions copyright (c) 2009-2016 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -33,16 +33,6 @@ ...@@ -33,16 +33,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuCustomNonbondedForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomNonbondedForce& owner;
};
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) : const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) { energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
...@@ -70,7 +60,7 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression ...@@ -70,7 +60,7 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression, CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression,
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions, const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) : const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) :
cutoff(false), useSwitch(false), periodic(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) { cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
for (int i = 0; i < threads.getNumThreads(); i++) for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions)); threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions));
} }
...@@ -80,13 +70,14 @@ CpuCustomNonbondedForce::~CpuCustomNonbondedForce() { ...@@ -80,13 +70,14 @@ CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
delete threadData[i]; delete threadData[i];
} }
void CpuCustomNonbondedForce::setUseCutoff(RealOpenMM distance, const CpuNeighborList& neighbors) { void CpuCustomNonbondedForce::setUseCutoff(double distance, const CpuNeighborList& neighbors) {
cutoff = true; cutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
neighborList = &neighbors; neighborList = &neighbors;
} }
void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) { void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) {
useInteractionGroups = true;
for (int group = 0; group < (int) groups.size(); group++) { for (int group = 0; group < (int) groups.size(); group++) {
const set<int>& set1 = groups[group].first; const set<int>& set1 = groups[group].first;
const set<int>& set2 = groups[group].second; const set<int>& set2 = groups[group].second;
...@@ -102,12 +93,12 @@ void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, s ...@@ -102,12 +93,12 @@ void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, s
} }
} }
void CpuCustomNonbondedForce::setUseSwitchingFunction(RealOpenMM distance) { void CpuCustomNonbondedForce::setUseSwitchingFunction(double distance) {
useSwitch = true; useSwitch = true;
switchingDistance = distance; switchingDistance = distance;
} }
void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) { void CpuCustomNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
assert(cutoff); assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance); assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance); assert(periodicBoxVectors[1][1] >= 2.0*cutoffDistance);
...@@ -129,9 +120,9 @@ void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) { ...@@ -129,9 +120,9 @@ void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
} }
void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<RealVec>& atomCoordinates, RealOpenMM** atomParameters, void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<Vec3>& atomCoordinates, double** atomParameters,
RealOpenMM* fixedParameters, const map<string, double>& globalParameters, double* fixedParameters, const map<string, double>& globalParameters,
vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) { vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
...@@ -149,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v ...@@ -149,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Combine the energies from all the threads. // Combine the energies from all the threads.
...@@ -183,7 +173,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -183,7 +173,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
data.energyParamDerivs[i] = 0.0; data.energyParamDerivs[i] = 0.0;
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0); fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0); fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (groupInteractions.size() > 0) { if (useInteractionGroups) {
// The user has specified interaction groups, so compute only the requested interactions. // The user has specified interaction groups, so compute only the requested interactions.
while (true) { while (true) {
......
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -37,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096; ...@@ -37,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096;
const float CpuGBSAOBCForce::TABLE_MIN = 0.25f; const float CpuGBSAOBCForce::TABLE_MIN = 0.25f;
const float CpuGBSAOBCForce::TABLE_MAX = 1.5f; const float CpuGBSAOBCForce::TABLE_MAX = 1.5f;
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuGBSAOBCForce& owner;
};
CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) { CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS; logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS;
logDXInv = 1.0f/logDX; logDXInv = 1.0f/logDX;
...@@ -89,6 +78,10 @@ void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, f ...@@ -89,6 +78,10 @@ void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, f
particleParams = params; particleParams = params;
bornRadii.resize(params.size()+3); bornRadii.resize(params.size()+3);
obcChain.resize(params.size()+3); obcChain.resize(params.size()+3);
for (int i = bornRadii.size()-3; i < bornRadii.size(); i++) {
bornRadii[i] = 0;
obcChain[i] = 0;
}
} }
void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) { void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
...@@ -107,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align ...@@ -107,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeTask task(*this);
gmx_atomic_set(&counter, 0); gmx_atomic_set(&counter, 0);
threads.execute(task); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads(); // Compute Born radii threads.waitForThreads(); // Compute Born radii
gmx_atomic_set(&counter, 0); gmx_atomic_set(&counter, 0);
threads.resumeThreads(); threads.resumeThreads();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2016 Stanford University and the Authors. * * Portions copyright (c) 2016-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -44,17 +44,6 @@ ...@@ -44,17 +44,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuGayBerneForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGayBerneForce& owner, CpuNeighborList* neighborList) : owner(owner), neighborList(neighborList) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex, neighborList);
}
CpuGayBerneForce& owner;
CpuNeighborList* neighborList;
};
CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) { CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) {
// Record the force parameters. // Record the force parameters.
...@@ -111,7 +100,7 @@ const vector<set<int> >& CpuGayBerneForce::getExclusions() const { ...@@ -111,7 +100,7 @@ const vector<set<int> >& CpuGayBerneForce::getExclusions() const {
return particleExclusions; return particleExclusions;
} }
RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, std::vector<RealVec>& forces, std::vector<AlignedArray<float> >& threadForce, RealVec* boxVectors, CpuPlatform::PlatformData& data) { double CpuGayBerneForce::calculateForce(const vector<Vec3>& positions, std::vector<Vec3>& forces, std::vector<AlignedArray<float> >& threadForce, Vec3* boxVectors, CpuPlatform::PlatformData& data) {
if (nonbondedMethod == GayBerneForce::CutoffPeriodic) { if (nonbondedMethod == GayBerneForce::CutoffPeriodic) {
double minAllowedSize = 1.999999*cutoffDistance; double minAllowedSize = 1.999999*cutoffDistance;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize) if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
...@@ -137,8 +126,7 @@ RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, st ...@@ -137,8 +126,7 @@ RealOpenMM CpuGayBerneForce::calculateForce(const vector<RealVec>& positions, st
// Signal the threads to compute the pairwise interactions. // Signal the threads to compute the pairwise interactions.
ComputeTask task(*this, data.neighborList); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex, data.neighborList); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to compute exceptions. // Signal the threads to compute exceptions.
...@@ -164,10 +152,10 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex, ...@@ -164,10 +152,10 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
int numThreads = threads.getNumThreads(); int numThreads = threads.getNumThreads();
threadEnergy[threadIndex] = 0; threadEnergy[threadIndex] = 0;
float* forces = &(*threadForce)[threadIndex][0]; float* forces = &(*threadForce)[threadIndex][0];
vector<RealVec>& torques = threadTorque[threadIndex]; vector<Vec3>& torques = threadTorque[threadIndex];
torques.resize(numParticles); torques.resize(numParticles);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
torques[i] = RealVec(); torques[i] = Vec3();
double energy = 0.0; double energy = 0.0;
// Compute this thread's subset of interactions. // Compute this thread's subset of interactions.
...@@ -184,8 +172,8 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex, ...@@ -184,8 +172,8 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
continue; continue;
if (particleExclusions[i].find(j) != particleExclusions[i].end()) if (particleExclusions[i].find(j) != particleExclusions[i].end())
continue; // This interaction will be handled by an exception. continue; // This interaction will be handled by an exception.
RealOpenMM sigma = particles[i].sigmaOver2+particles[j].sigmaOver2; double sigma = particles[i].sigmaOver2+particles[j].sigmaOver2;
RealOpenMM epsilon = particles[i].sqrtEpsilon*particles[j].sqrtEpsilon; double epsilon = particles[i].sqrtEpsilon*particles[j].sqrtEpsilon;
energy += computeOneInteraction(i, j, sigma, epsilon, positions, forces, torques, boxVectors); energy += computeOneInteraction(i, j, sigma, epsilon, positions, forces, torques, boxVectors);
} }
} }
...@@ -208,8 +196,8 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex, ...@@ -208,8 +196,8 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
int second = blockAtom[k]; int second = blockAtom[k];
if (particles[second].sqrtEpsilon == 0.0f) if (particles[second].sqrtEpsilon == 0.0f)
continue; continue;
RealOpenMM sigma = particles[first].sigmaOver2+particles[second].sigmaOver2; double sigma = particles[first].sigmaOver2+particles[second].sigmaOver2;
RealOpenMM epsilon = particles[first].sqrtEpsilon*particles[second].sqrtEpsilon; double epsilon = particles[first].sqrtEpsilon*particles[second].sqrtEpsilon;
energy += computeOneInteraction(first, second, sigma, epsilon, positions, forces, torques, boxVectors); energy += computeOneInteraction(first, second, sigma, epsilon, positions, forces, torques, boxVectors);
} }
} }
...@@ -235,39 +223,39 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex, ...@@ -235,39 +223,39 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
threadEnergy[threadIndex] = energy; threadEnergy[threadIndex] = energy;
} }
void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions) { void CpuGayBerneForce::computeEllipsoidFrames(const vector<Vec3>& positions) {
int numParticles = particles.size(); int numParticles = particles.size();
for (int particle = 0; particle < numParticles; particle++) { for (int particle = 0; particle < numParticles; particle++) {
ParticleInfo& p = particles[particle]; ParticleInfo& p = particles[particle];
// Compute the local coordinate system of the ellipsoid; // Compute the local coordinate system of the ellipsoid;
RealVec xdir, ydir, zdir; Vec3 xdir, ydir, zdir;
if (p.xparticle == -1) { if (p.xparticle == -1) {
xdir = RealVec(1, 0, 0); xdir = Vec3(1, 0, 0);
ydir = RealVec(0, 1, 0); ydir = Vec3(0, 1, 0);
} }
else { else {
xdir = positions[particle]-positions[p.xparticle]; xdir = positions[particle]-positions[p.xparticle];
xdir /= SQRT(xdir.dot(xdir)); xdir /= sqrt(xdir.dot(xdir));
if (p.yparticle == -1) { if (p.yparticle == -1) {
if (xdir[1] > -0.5 && xdir[1] < 0.5) if (xdir[1] > -0.5 && xdir[1] < 0.5)
ydir = RealVec(0, 1, 0); ydir = Vec3(0, 1, 0);
else else
ydir = RealVec(1, 0, 0); ydir = Vec3(1, 0, 0);
} }
else else
ydir = positions[particle]-positions[p.yparticle]; ydir = positions[particle]-positions[p.yparticle];
ydir -= xdir*(xdir.dot(ydir)); ydir -= xdir*(xdir.dot(ydir));
ydir /= SQRT(ydir.dot(ydir)); ydir /= sqrt(ydir.dot(ydir));
} }
zdir = xdir.cross(ydir); zdir = xdir.cross(ydir);
// Compute matrices we will need later. // Compute matrices we will need later.
RealOpenMM (&a)[3][3] = A[particle].v; double (&a)[3][3] = A[particle].v;
RealOpenMM (&b)[3][3] = B[particle].v; double (&b)[3][3] = B[particle].v;
RealOpenMM (&g)[3][3] = G[particle].v; double (&g)[3][3] = G[particle].v;
a[0][0] = xdir[0]; a[0][0] = xdir[0];
a[0][1] = xdir[1]; a[0][1] = xdir[1];
a[0][2] = xdir[2]; a[0][2] = xdir[2];
...@@ -277,8 +265,8 @@ void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions) ...@@ -277,8 +265,8 @@ void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions)
a[2][0] = zdir[0]; a[2][0] = zdir[0];
a[2][1] = zdir[1]; a[2][1] = zdir[1];
a[2][2] = zdir[2]; a[2][2] = zdir[2];
RealVec r2(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz); Vec3 r2(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz);
RealVec e2(1/sqrt(p.ex), 1/sqrt(p.ey), 1/sqrt(p.ez)); Vec3 e2(1/sqrt(p.ex), 1/sqrt(p.ey), 1/sqrt(p.ez));
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) { for (int j = 0; j < 3; j++) {
b[i][j] = 0; b[i][j] = 0;
...@@ -291,33 +279,33 @@ void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions) ...@@ -291,33 +279,33 @@ void CpuGayBerneForce::computeEllipsoidFrames(const vector<RealVec>& positions)
} }
} }
void CpuGayBerneForce::applyTorques(const vector<RealVec>& positions, vector<RealVec>& forces) { void CpuGayBerneForce::applyTorques(const vector<Vec3>& positions, vector<Vec3>& forces) {
int numParticles = particles.size(); int numParticles = particles.size();
int numThreads = threadTorque.size(); int numThreads = threadTorque.size();
for (int particle = 0; particle < numParticles; particle++) { for (int particle = 0; particle < numParticles; particle++) {
ParticleInfo& p = particles[particle]; ParticleInfo& p = particles[particle];
RealVec pos = positions[particle]; Vec3 pos = positions[particle];
if (p.xparticle != -1) { if (p.xparticle != -1) {
// Add up the torques from the individual threads. // Add up the torques from the individual threads.
RealVec torque; Vec3 torque;
for (int i = 0; i < numThreads; i++) for (int i = 0; i < numThreads; i++)
torque += threadTorque[i][particle]; torque += threadTorque[i][particle];
// Apply a force to the x particle. // Apply a force to the x particle.
RealVec dx = positions[p.xparticle]-pos; Vec3 dx = positions[p.xparticle]-pos;
double dx2 = dx.dot(dx); double dx2 = dx.dot(dx);
RealVec f = torque.cross(dx)/dx2; Vec3 f = torque.cross(dx)/dx2;
forces[p.xparticle] += f; forces[p.xparticle] += f;
forces[particle] -= f; forces[particle] -= f;
if (p.yparticle != -1) { if (p.yparticle != -1) {
// Apply a force to the y particle. This is based on the component of the torque // Apply a force to the y particle. This is based on the component of the torque
// that was not already applied to the x particle. // that was not already applied to the x particle.
RealVec dy = positions[p.yparticle]-pos; Vec3 dy = positions[p.yparticle]-pos;
double dy2 = dy.dot(dy); double dy2 = dy.dot(dy);
RealVec torque2 = dx*(torque.dot(dx)/dx2); Vec3 torque2 = dx*(torque.dot(dx)/dx2);
f = torque2.cross(dy)/dy2; f = torque2.cross(dy)/dy2;
forces[p.yparticle] += f; forces[p.yparticle] += f;
forces[particle] -= f; forces[particle] -= f;
...@@ -326,27 +314,27 @@ void CpuGayBerneForce::applyTorques(const vector<RealVec>& positions, vector<Rea ...@@ -326,27 +314,27 @@ void CpuGayBerneForce::applyTorques(const vector<RealVec>& positions, vector<Rea
} }
} }
RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, RealOpenMM sigma, RealOpenMM epsilon, const RealVec* positions, double CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, double sigma, double epsilon, const Vec3* positions,
float* forces, vector<RealVec>& torques, const RealVec* boxVectors) { float* forces, vector<Vec3>& torques, const Vec3* boxVectors) {
// Compute the displacement and check against the cutoff. // Compute the displacement and check against the cutoff.
RealOpenMM deltaR[ReferenceForce::LastDeltaRIndex]; double deltaR[ReferenceForce::LastDeltaRIndex];
if (nonbondedMethod == GayBerneForce::CutoffPeriodic) if (nonbondedMethod == GayBerneForce::CutoffPeriodic)
ReferenceForce::getDeltaRPeriodic(positions[particle2], positions[particle1], boxVectors, deltaR); ReferenceForce::getDeltaRPeriodic(positions[particle2], positions[particle1], boxVectors, deltaR);
else else
ReferenceForce::getDeltaR(positions[particle2], positions[particle1], deltaR); ReferenceForce::getDeltaR(positions[particle2], positions[particle1], deltaR);
RealOpenMM r = deltaR[ReferenceForce::RIndex]; double r = deltaR[ReferenceForce::RIndex];
if (nonbondedMethod != GayBerneForce::NoCutoff && r >= cutoffDistance) if (nonbondedMethod != GayBerneForce::NoCutoff && r >= cutoffDistance)
return 0; return 0;
RealOpenMM rInv = 1/r; double rInv = 1/r;
RealVec dr(deltaR[ReferenceForce::XIndex], deltaR[ReferenceForce::YIndex], deltaR[ReferenceForce::ZIndex]); Vec3 dr(deltaR[ReferenceForce::XIndex], deltaR[ReferenceForce::YIndex], deltaR[ReferenceForce::ZIndex]);
RealVec drUnit = dr*rInv; Vec3 drUnit = dr*rInv;
// Compute the switching function. // Compute the switching function.
RealOpenMM switchValue = 1, switchDeriv = 0; double switchValue = 1, switchDeriv = 0;
if (useSwitchingFunction && r > switchingDistance) { if (useSwitchingFunction && r > switchingDistance) {
RealOpenMM t = (r-switchingDistance)/(cutoffDistance-switchingDistance); double t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
switchValue = 1+t*t*t*(-10+t*(15-t*6)); switchValue = 1+t*t*t*(-10+t*(15-t*6));
switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance); switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
} }
...@@ -354,11 +342,11 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, ...@@ -354,11 +342,11 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
// Interactions between two point particles can be computed more easily. // Interactions between two point particles can be computed more easily.
if (particles[particle1].isPointParticle && particles[particle2].isPointParticle) { if (particles[particle1].isPointParticle && particles[particle2].isPointParticle) {
RealOpenMM sig = sigma*rInv; double sig = sigma*rInv;
RealOpenMM sig2 = sig*sig; double sig2 = sig*sig;
RealOpenMM sig6 = sig2*sig2*sig2; double sig6 = sig2*sig2*sig2;
RealOpenMM energy = 4*epsilon*(sig6-1)*sig6; double energy = 4*epsilon*(sig6-1)*sig6;
RealVec force = drUnit*(switchValue*4*epsilon*(12*sig6 - 6)*sig6*rInv - energy*switchDeriv); Vec3 force = drUnit*(switchValue*4*epsilon*(12*sig6 - 6)*sig6*rInv - energy*switchDeriv);
forces[4*particle1] += force[0]; forces[4*particle1] += force[0];
forces[4*particle1+1] += force[1]; forces[4*particle1+1] += force[1];
forces[4*particle1+2] += force[2]; forces[4*particle1+2] += force[2];
...@@ -374,31 +362,31 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, ...@@ -374,31 +362,31 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
Matrix G12 = G[particle1]+G[particle2]; Matrix G12 = G[particle1]+G[particle2];
Matrix B12inv = B12.inverse(); Matrix B12inv = B12.inverse();
Matrix G12inv = G12.inverse(); Matrix G12inv = G12.inverse();
RealOpenMM detG12 = G12.determinant(); double detG12 = G12.determinant();
// Estimate the distance between the ellipsoids and compute the first terms needed for the energy. // Estimate the distance between the ellipsoids and compute the first terms needed for the energy.
RealOpenMM sigma12 = 1/SQRT(0.5*drUnit.dot(G12inv*drUnit)); double sigma12 = 1/sqrt(0.5*drUnit.dot(G12inv*drUnit));
RealOpenMM h12 = r - sigma12; double h12 = r - sigma12;
RealOpenMM rho = sigma/(h12+sigma); double rho = sigma/(h12+sigma);
RealOpenMM rho2 = rho*rho; double rho2 = rho*rho;
RealOpenMM rho6 = rho2*rho2*rho2; double rho6 = rho2*rho2*rho2;
RealOpenMM u = 4*epsilon*(rho6*rho6-rho6); double u = 4*epsilon*(rho6*rho6-rho6);
RealOpenMM eta = SQRT(2*s[particle1]*s[particle2]/detG12); double eta = sqrt(2*s[particle1]*s[particle2]/detG12);
RealOpenMM chi = 2*drUnit.dot(B12inv*drUnit); double chi = 2*drUnit.dot(B12inv*drUnit);
chi *= chi; chi *= chi;
RealOpenMM energy = u*eta*chi; double energy = u*eta*chi;
// Compute the terms needed for the force. // Compute the terms needed for the force.
RealVec kappa = G12inv*dr; Vec3 kappa = G12inv*dr;
RealVec iota = B12inv*dr; Vec3 iota = B12inv*dr;
RealOpenMM rInv2 = rInv*rInv; double rInv2 = rInv*rInv;
RealOpenMM dUSLJdr = 24*epsilon*(2*rho6-1)*rho6*rho/sigma; double dUSLJdr = 24*epsilon*(2*rho6-1)*rho6*rho/sigma;
RealOpenMM temp = 0.5*sigma12*sigma12*sigma12*rInv2; double temp = 0.5*sigma12*sigma12*sigma12*rInv2;
RealVec dudr = (drUnit + (kappa-drUnit*kappa.dot(drUnit))*temp)*dUSLJdr; Vec3 dudr = (drUnit + (kappa-drUnit*kappa.dot(drUnit))*temp)*dUSLJdr;
RealVec dchidr = (iota-drUnit*iota.dot(drUnit))*(-8*rInv2*SQRT(chi)); Vec3 dchidr = (iota-drUnit*iota.dot(drUnit))*(-8*rInv2*sqrt(chi));
RealVec force = (dchidr*u + dudr*chi)*(eta*switchValue) - drUnit*(energy*switchDeriv); Vec3 force = (dchidr*u + dudr*chi)*(eta*switchValue) - drUnit*(energy*switchDeriv);
forces[4*particle1] += force[0]; forces[4*particle1] += force[0];
forces[4*particle1+1] += force[1]; forces[4*particle1+1] += force[1];
forces[4*particle1+2] += force[2]; forces[4*particle1+2] += force[2];
...@@ -413,13 +401,13 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, ...@@ -413,13 +401,13 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
ParticleInfo& p = particles[particle]; ParticleInfo& p = particles[particle];
if (p.isPointParticle) if (p.isPointParticle)
continue; continue;
RealVec dudq = (kappa*G[particle]).cross(kappa*(temp*dUSLJdr)); Vec3 dudq = (kappa*G[particle]).cross(kappa*(temp*dUSLJdr));
RealVec dchidq = (iota*B[particle]).cross(iota)*(-4*rInv2); Vec3 dchidq = (iota*B[particle]).cross(iota)*(-4*rInv2);
RealOpenMM (&g12)[3][3] = G12.v; double (&g12)[3][3] = G12.v;
RealOpenMM (&a)[3][3] = A[particle].v; double (&a)[3][3] = A[particle].v;
RealVec scale = RealVec(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz)*(-0.5*eta/detG12); Vec3 scale = Vec3(p.rx*p.rx, p.ry*p.ry, p.rz*p.rz)*(-0.5*eta/detG12);
Matrix D; Matrix D;
RealOpenMM (&d)[3][3] = D.v; double (&d)[3][3] = D.v;
d[0][0] = scale[0]*(2*a[0][0]*(g12[1][1]*g12[2][2] - g12[1][2]*g12[2][1]) + d[0][0] = scale[0]*(2*a[0][0]*(g12[1][1]*g12[2][2] - g12[1][2]*g12[2][1]) +
a[0][2]*(g12[1][2]*g12[0][1] + g12[1][0]*g12[2][1] - g12[1][1]*(g12[0][2] + g12[2][0])) + a[0][2]*(g12[1][2]*g12[0][1] + g12[1][0]*g12[2][1] - g12[1][1]*(g12[0][2] + g12[2][0])) +
a[0][1]*(g12[0][2]*g12[2][1] + g12[2][0]*g12[1][2] - g12[2][2]*(g12[0][1] + g12[1][0]))); a[0][1]*(g12[0][2]*g12[2][1] + g12[2][0]*g12[1][2] - g12[2][2]*(g12[0][1] + g12[1][0])));
...@@ -447,10 +435,10 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2, ...@@ -447,10 +435,10 @@ RealOpenMM CpuGayBerneForce::computeOneInteraction(int particle1, int particle2,
d[2][2] = scale[2]*( a[2][0]*(g12[0][1]*g12[1][2] + g12[2][1]*g12[1][0] - g12[1][1]*(g12[0][2] + g12[2][0])) + d[2][2] = scale[2]*( a[2][0]*(g12[0][1]*g12[1][2] + g12[2][1]*g12[1][0] - g12[1][1]*(g12[0][2] + g12[2][0])) +
a[2][1]*(g12[1][0]*g12[0][2] + g12[2][0]*g12[0][1] - g12[0][0]*(g12[1][2] + g12[2][1])) + a[2][1]*(g12[1][0]*g12[0][2] + g12[2][0]*g12[0][1] - g12[0][0]*(g12[1][2] + g12[2][1])) +
2*a[2][2]*(g12[1][1]*g12[0][0] - g12[1][0]*g12[0][1])); 2*a[2][2]*(g12[1][1]*g12[0][0] - g12[1][0]*g12[0][1]));
RealVec detadq; Vec3 detadq;
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
detadq += RealVec(a[i][0], a[i][1], a[i][2]).cross(RealVec(d[i][0], d[i][1], d[i][2])); detadq += Vec3(a[i][0], a[i][1], a[i][2]).cross(Vec3(d[i][0], d[i][1], d[i][2]));
RealVec torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue; Vec3 torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue;
torques[particle] -= torque; torques[particle] -= torque;
} }
return switchValue*energy; return switchValue*energy;
......
...@@ -41,43 +41,44 @@ ...@@ -41,43 +41,44 @@
#include "ReferenceTabulatedFunction.h" #include "ReferenceTabulatedFunction.h"
#include "openmm/Context.h" #include "openmm/Context.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "openmm/Vec3.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomNonbondedForceImpl.h" #include "openmm/internal/CustomNonbondedForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h" #include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include "RealVec.h"
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
#include "lepton/CustomFunction.h" #include "lepton/CustomFunction.h"
#include "lepton/Operation.h" #include "lepton/Operation.h"
#include "lepton/Parser.h" #include "lepton/Parser.h"
#include <iostream>
#include "lepton/ParsedExpression.h" #include "lepton/ParsedExpression.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
static vector<RealVec>& extractPositions(ContextImpl& context) { static vector<Vec3>& extractPositions(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData()); ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *((vector<RealVec>*) data->positions); return *((vector<Vec3>*) data->positions);
} }
static vector<RealVec>& extractVelocities(ContextImpl& context) { static vector<Vec3>& extractVelocities(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData()); ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *((vector<RealVec>*) data->velocities); return *((vector<Vec3>*) data->velocities);
} }
static vector<RealVec>& extractForces(ContextImpl& context) { static vector<Vec3>& extractForces(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData()); ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *((vector<RealVec>*) data->forces); return *((vector<Vec3>*) data->forces);
} }
static RealVec& extractBoxSize(ContextImpl& context) { static Vec3& extractBoxSize(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData()); ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *(RealVec*) data->periodicBoxSize; return *(Vec3*) data->periodicBoxSize;
} }
static RealVec* extractBoxVectors(ContextImpl& context) { static Vec3* extractBoxVectors(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData()); ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return (RealVec*) data->periodicBoxVectors; return (Vec3*) data->periodicBoxVectors;
} }
static ReferenceConstraints& extractConstraints(ContextImpl& context) { static ReferenceConstraints& extractConstraints(ContextImpl& context) {
...@@ -106,14 +107,14 @@ static void validateVariables(const Lepton::ExpressionTreeNode& node, const set< ...@@ -106,14 +107,14 @@ static void validateVariables(const Lepton::ExpressionTreeNode& node, const set<
* for a leapfrog integrator. * for a leapfrog integrator.
*/ */
static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>& masses, double timeShift) { static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>& masses, double timeShift) {
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
vector<RealVec>& velData = extractVelocities(context); vector<Vec3>& velData = extractVelocities(context);
vector<RealVec>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
// Compute the shifted velocities. // Compute the shifted velocities.
vector<RealVec> shiftedVel(numParticles); vector<Vec3> shiftedVel(numParticles);
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
if (masses[i] > 0) if (masses[i] > 0)
shiftedVel[i] = velData[i]+forceData[i]*(timeShift/masses[i]); shiftedVel[i] = velData[i]+forceData[i]*(timeShift/masses[i]);
...@@ -137,40 +138,32 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>& ...@@ -137,40 +138,32 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy; return 0.5*energy;
} }
class CpuCalcForcesAndEnergyKernel::SumForceTask : public ThreadPool::Task { CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
public: CalcForcesAndEnergyKernel(name, platform), data(data) {
SumForceTask(int numParticles, vector<RealVec>& forceData, CpuPlatform::PlatformData& data) : numParticles(numParticles), forceData(forceData), data(data) { // Create a Reference platform version of this kernel.
}
void execute(ThreadPool& threads, int threadIndex) { ReferenceKernelFactory referenceFactory;
// Sum the contributions to forces that have been calculated by different threads. referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
}
int numParticles;
vector<RealVec>& forceData;
CpuPlatform::PlatformData& data;
};
class CpuCalcForcesAndEnergyKernel::InitForceTask : public ThreadPool::Task { void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
public: referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
InitForceTask(int numParticles, ContextImpl& context, CpuPlatform::PlatformData& data) : numParticles(numParticles), positionsValid(true), context(context), data(data) { lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
} }
void execute(ThreadPool& threads, int threadIndex) {
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
bool positionsValid = true;
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Convert the positions to single precision and apply periodic boundary conditions // Convert the positions to single precision and apply periodic boundary conditions
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
RealVec* boxVectors = extractBoxVectors(context); Vec3* boxVectors = extractBoxVectors(context);
double boxSize[3] = {boxVectors[0][0], boxVectors[1][1], boxVectors[2][2]}; double boxSize[3] = {boxVectors[0][0], boxVectors[1][1], boxVectors[2][2]};
double invBoxSize[3] = {1/boxVectors[0][0], 1/boxVectors[1][1], 1/boxVectors[2][2]}; double invBoxSize[3] = {1/boxVectors[0][0], 1/boxVectors[1][1], 1/boxVectors[2][2]};
bool triclinic = (boxVectors[0][1] != 0 || boxVectors[0][2] != 0 || boxVectors[1][0] != 0 || boxVectors[1][2] != 0 || boxVectors[2][0] != 0 || boxVectors[2][1] != 0); bool triclinic = (boxVectors[0][1] != 0 || boxVectors[0][2] != 0 || boxVectors[1][0] != 0 || boxVectors[1][2] != 0 || boxVectors[2][0] != 0 || boxVectors[2][1] != 0);
...@@ -181,7 +174,7 @@ public: ...@@ -181,7 +174,7 @@ public:
if (data.isPeriodic) { if (data.isPeriodic) {
if (triclinic) { if (triclinic) {
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
RealVec pos = posData[i]; Vec3 pos = posData[i];
pos -= boxVectors[2]*floor(pos[2]*invBoxSize[2]); pos -= boxVectors[2]*floor(pos[2]*invBoxSize[2]);
pos -= boxVectors[1]*floor(pos[1]*invBoxSize[1]); pos -= boxVectors[1]*floor(pos[1]*invBoxSize[1]);
pos -= boxVectors[0]*floor(pos[0]*invBoxSize[0]); pos -= boxVectors[0]*floor(pos[0]*invBoxSize[0]);
...@@ -193,7 +186,7 @@ public: ...@@ -193,7 +186,7 @@ public:
else { else {
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
for (int j = 0; j < 3; j++) { for (int j = 0; j < 3; j++) {
RealOpenMM x = posData[i][j]; double x = posData[i][j];
double base = floor(x*invBoxSize[j])*boxSize[j]; double base = floor(x*invBoxSize[j])*boxSize[j];
posq[4*i+j] = (float) (x-base); posq[4*i+j] = (float) (x-base);
} }
...@@ -218,36 +211,9 @@ public: ...@@ -218,36 +211,9 @@ public:
fvec4 zero(0.0f); fvec4 zero(0.0f);
for (int j = 0; j < numParticles; j++) for (int j = 0; j < numParticles; j++)
zero.store(&data.threadForce[threadIndex][j*4]); zero.store(&data.threadForce[threadIndex][j*4]);
} });
int numParticles;
bool positionsValid;
ContextImpl& context;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
InitForceTask task(numParticles, context, data);
data.threads.execute(task);
data.threads.waitForThreads(); data.threads.waitForThreads();
if (!task.positionsValid) if (!positionsValid)
throw OpenMMException("Particle coordinate is nan"); throw OpenMMException("Particle coordinate is nan");
// Determine whether we need to recompute the neighbor list. // Determine whether we need to recompute the neighbor list.
...@@ -259,9 +225,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -259,9 +225,9 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double farCutoff2 = 0.5*padding*padding; double farCutoff2 = 0.5*padding*padding;
int maxNumMoved = numParticles/10; int maxNumMoved = numParticles/10;
vector<int> moved; vector<int> moved;
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
RealVec delta = posData[i]-lastPositions[i]; Vec3 delta = posData[i]-lastPositions[i];
double dist2 = delta.dot(delta); double dist2 = delta.dot(delta);
if (dist2 > closeCutoff2) { if (dist2 > closeCutoff2) {
moved.push_back(i); moved.push_back(i);
...@@ -280,11 +246,11 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -280,11 +246,11 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double paddedCutoff2 = data.paddedCutoff*data.paddedCutoff; double paddedCutoff2 = data.paddedCutoff*data.paddedCutoff;
for (int i = 1; i < numMoved && !needRecompute; i++) for (int i = 1; i < numMoved && !needRecompute; i++)
for (int j = 0; j < i; j++) { for (int j = 0; j < i; j++) {
RealVec delta = posData[moved[i]]-posData[moved[j]]; Vec3 delta = posData[moved[i]]-posData[moved[j]];
if (delta.dot(delta) < cutoff2) { if (delta.dot(delta) < cutoff2) {
// These particles should interact. See if they are in the neighbor list. // These particles should interact. See if they are in the neighbor list.
RealVec oldDelta = lastPositions[moved[i]]-lastPositions[moved[j]]; Vec3 oldDelta = lastPositions[moved[i]]-lastPositions[moved[j]];
if (oldDelta.dot(oldDelta) > paddedCutoff2) { if (oldDelta.dot(oldDelta) > paddedCutoff2) {
needRecompute = true; needRecompute = true;
break; break;
...@@ -302,8 +268,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -302,8 +268,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) { double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
// Sum the forces from all the threads. // Sum the forces from all the threads.
SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data); data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
data.threads.execute(task); // Sum the contributions to forces that have been calculated by different threads.
int numParticles = context.getSystem().getNumParticles();
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
vector<Vec3>& forceData = extractForces(context);
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
});
data.threads.waitForThreads(); data.threads.waitForThreads();
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid); return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
} }
...@@ -324,9 +305,9 @@ void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const Har ...@@ -324,9 +305,9 @@ void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const Har
angleIndexArray = new int*[numAngles]; angleIndexArray = new int*[numAngles];
for (int i = 0; i < numAngles; i++) for (int i = 0; i < numAngles; i++)
angleIndexArray[i] = new int[3]; angleIndexArray[i] = new int[3];
angleParamArray = new RealOpenMM*[numAngles]; angleParamArray = new double*[numAngles];
for (int i = 0; i < numAngles; i++) for (int i = 0; i < numAngles; i++)
angleParamArray[i] = new RealOpenMM[2]; angleParamArray[i] = new double[2];
for (int i = 0; i < numAngles; ++i) { for (int i = 0; i < numAngles; ++i) {
int particle1, particle2, particle3; int particle1, particle2, particle3;
double angle, k; double angle, k;
...@@ -334,17 +315,17 @@ void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const Har ...@@ -334,17 +315,17 @@ void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const Har
angleIndexArray[i][0] = particle1; angleIndexArray[i][0] = particle1;
angleIndexArray[i][1] = particle2; angleIndexArray[i][1] = particle2;
angleIndexArray[i][2] = particle3; angleIndexArray[i][2] = particle3;
angleParamArray[i][0] = (RealOpenMM) angle; angleParamArray[i][0] = angle;
angleParamArray[i][1] = (RealOpenMM) k; angleParamArray[i][1] = k;
} }
bondForce.initialize(system.getNumParticles(), numAngles, 3, angleIndexArray, data.threads); bondForce.initialize(system.getNumParticles(), numAngles, 3, angleIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions(); usePeriodic = force.usesPeriodicBoundaryConditions();
} }
double CpuCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
RealOpenMM energy = 0; double energy = 0;
ReferenceAngleBondIxn angleBond; ReferenceAngleBondIxn angleBond;
if (usePeriodic) if (usePeriodic)
angleBond.setPeriodic(extractBoxVectors(context)); angleBond.setPeriodic(extractBoxVectors(context));
...@@ -364,8 +345,8 @@ void CpuCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& conte ...@@ -364,8 +345,8 @@ void CpuCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& conte
force.getAngleParameters(i, particle1, particle2, particle3, angle, k); force.getAngleParameters(i, particle1, particle2, particle3, angle, k);
if (particle1 != angleIndexArray[i][0] || particle2 != angleIndexArray[i][1] || particle3 != angleIndexArray[i][2]) if (particle1 != angleIndexArray[i][0] || particle2 != angleIndexArray[i][1] || particle3 != angleIndexArray[i][2])
throw OpenMMException("updateParametersInContext: The set of particles in an angle has changed"); throw OpenMMException("updateParametersInContext: The set of particles in an angle has changed");
angleParamArray[i][0] = (RealOpenMM) angle; angleParamArray[i][0] = angle;
angleParamArray[i][1] = (RealOpenMM) k; angleParamArray[i][1] = k;
} }
} }
...@@ -385,9 +366,9 @@ void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const P ...@@ -385,9 +366,9 @@ void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const P
torsionIndexArray = new int*[numTorsions]; torsionIndexArray = new int*[numTorsions];
for (int i = 0; i < numTorsions; i++) for (int i = 0; i < numTorsions; i++)
torsionIndexArray[i] = new int[4]; torsionIndexArray[i] = new int[4];
torsionParamArray = new RealOpenMM*[numTorsions]; torsionParamArray = new double*[numTorsions];
for (int i = 0; i < numTorsions; i++) for (int i = 0; i < numTorsions; i++)
torsionParamArray[i] = new RealOpenMM[3]; torsionParamArray[i] = new double[3];
for (int i = 0; i < numTorsions; ++i) { for (int i = 0; i < numTorsions; ++i) {
int particle1, particle2, particle3, particle4, periodicity; int particle1, particle2, particle3, particle4, periodicity;
double phase, k; double phase, k;
...@@ -396,18 +377,18 @@ void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const P ...@@ -396,18 +377,18 @@ void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const P
torsionIndexArray[i][1] = particle2; torsionIndexArray[i][1] = particle2;
torsionIndexArray[i][2] = particle3; torsionIndexArray[i][2] = particle3;
torsionIndexArray[i][3] = particle4; torsionIndexArray[i][3] = particle4;
torsionParamArray[i][0] = (RealOpenMM) k; torsionParamArray[i][0] = k;
torsionParamArray[i][1] = (RealOpenMM) phase; torsionParamArray[i][1] = phase;
torsionParamArray[i][2] = (RealOpenMM) periodicity; torsionParamArray[i][2] = periodicity;
} }
bondForce.initialize(system.getNumParticles(), numTorsions, 4, torsionIndexArray, data.threads); bondForce.initialize(system.getNumParticles(), numTorsions, 4, torsionIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions(); usePeriodic = force.usesPeriodicBoundaryConditions();
} }
double CpuCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
RealOpenMM energy = 0; double energy = 0;
ReferenceProperDihedralBond periodicTorsionBond; ReferenceProperDihedralBond periodicTorsionBond;
if (usePeriodic) if (usePeriodic)
periodicTorsionBond.setPeriodic(extractBoxVectors(context)); periodicTorsionBond.setPeriodic(extractBoxVectors(context));
...@@ -427,9 +408,9 @@ void CpuCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& con ...@@ -427,9 +408,9 @@ void CpuCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& con
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, periodicity, phase, k); force.getTorsionParameters(i, particle1, particle2, particle3, particle4, periodicity, phase, k);
if (particle1 != torsionIndexArray[i][0] || particle2 != torsionIndexArray[i][1] || particle3 != torsionIndexArray[i][2] || particle4 != torsionIndexArray[i][3]) if (particle1 != torsionIndexArray[i][0] || particle2 != torsionIndexArray[i][1] || particle3 != torsionIndexArray[i][2] || particle4 != torsionIndexArray[i][3])
throw OpenMMException("updateParametersInContext: The set of particles in a torsion has changed"); throw OpenMMException("updateParametersInContext: The set of particles in a torsion has changed");
torsionParamArray[i][0] = (RealOpenMM) k; torsionParamArray[i][0] = k;
torsionParamArray[i][1] = (RealOpenMM) phase; torsionParamArray[i][1] = phase;
torsionParamArray[i][2] = (RealOpenMM) periodicity; torsionParamArray[i][2] = periodicity;
} }
} }
...@@ -449,9 +430,9 @@ void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsi ...@@ -449,9 +430,9 @@ void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsi
torsionIndexArray = new int*[numTorsions]; torsionIndexArray = new int*[numTorsions];
for (int i = 0; i < numTorsions; i++) for (int i = 0; i < numTorsions; i++)
torsionIndexArray[i] = new int[4]; torsionIndexArray[i] = new int[4];
torsionParamArray = new RealOpenMM*[numTorsions]; torsionParamArray = new double*[numTorsions];
for (int i = 0; i < numTorsions; i++) for (int i = 0; i < numTorsions; i++)
torsionParamArray[i] = new RealOpenMM[6]; torsionParamArray[i] = new double[6];
for (int i = 0; i < numTorsions; ++i) { for (int i = 0; i < numTorsions; ++i) {
int particle1, particle2, particle3, particle4; int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5; double c0, c1, c2, c3, c4, c5;
...@@ -460,21 +441,21 @@ void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsi ...@@ -460,21 +441,21 @@ void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsi
torsionIndexArray[i][1] = particle2; torsionIndexArray[i][1] = particle2;
torsionIndexArray[i][2] = particle3; torsionIndexArray[i][2] = particle3;
torsionIndexArray[i][3] = particle4; torsionIndexArray[i][3] = particle4;
torsionParamArray[i][0] = (RealOpenMM) c0; torsionParamArray[i][0] = c0;
torsionParamArray[i][1] = (RealOpenMM) c1; torsionParamArray[i][1] = c1;
torsionParamArray[i][2] = (RealOpenMM) c2; torsionParamArray[i][2] = c2;
torsionParamArray[i][3] = (RealOpenMM) c3; torsionParamArray[i][3] = c3;
torsionParamArray[i][4] = (RealOpenMM) c4; torsionParamArray[i][4] = c4;
torsionParamArray[i][5] = (RealOpenMM) c5; torsionParamArray[i][5] = c5;
} }
bondForce.initialize(system.getNumParticles(), numTorsions, 4, torsionIndexArray, data.threads); bondForce.initialize(system.getNumParticles(), numTorsions, 4, torsionIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions(); usePeriodic = force.usesPeriodicBoundaryConditions();
} }
double CpuCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
RealOpenMM energy = 0; double energy = 0;
ReferenceRbDihedralBond rbTorsionBond; ReferenceRbDihedralBond rbTorsionBond;
if (usePeriodic) if (usePeriodic)
rbTorsionBond.setPeriodic(extractBoxVectors(context)); rbTorsionBond.setPeriodic(extractBoxVectors(context));
...@@ -494,12 +475,12 @@ void CpuCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -494,12 +475,12 @@ void CpuCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5); force.getTorsionParameters(i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
if (particle1 != torsionIndexArray[i][0] || particle2 != torsionIndexArray[i][1] || particle3 != torsionIndexArray[i][2] || particle4 != torsionIndexArray[i][3]) if (particle1 != torsionIndexArray[i][0] || particle2 != torsionIndexArray[i][1] || particle3 != torsionIndexArray[i][2] || particle4 != torsionIndexArray[i][3])
throw OpenMMException("updateParametersInContext: The set of particles in a torsion has changed"); throw OpenMMException("updateParametersInContext: The set of particles in a torsion has changed");
torsionParamArray[i][0] = (RealOpenMM) c0; torsionParamArray[i][0] = c0;
torsionParamArray[i][1] = (RealOpenMM) c1; torsionParamArray[i][1] = c1;
torsionParamArray[i][2] = (RealOpenMM) c2; torsionParamArray[i][2] = c2;
torsionParamArray[i][3] = (RealOpenMM) c3; torsionParamArray[i][3] = c3;
torsionParamArray[i][4] = (RealOpenMM) c4; torsionParamArray[i][4] = c4;
torsionParamArray[i][5] = (RealOpenMM) c5; torsionParamArray[i][5] = c5;
} }
} }
...@@ -528,7 +509,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4(); ...@@ -528,7 +509,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce* createCpuNonbondedForceVec8(); CpuNonbondedForce* createCpuNonbondedForceVec8();
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform), CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), nonbonded(NULL) { data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
if (isVec8Supported()) if (isVec8Supported())
nonbonded = createCpuNonbondedForceVec8(); nonbonded = createCpuNonbondedForceVec8();
else else
...@@ -575,12 +556,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -575,12 +556,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++) for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3]; bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles); particleParams.resize(numParticles);
C6params.resize(numParticles);
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth; double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth); force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge; data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth))); particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
...@@ -592,9 +575,9 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -592,9 +575,9 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
force.getExceptionParameters(nb14s[i], particle1, particle2, charge, radius, depth); force.getExceptionParameters(nb14s[i], particle1, particle2, charge, radius, depth);
bonded14IndexArray[i][0] = particle1; bonded14IndexArray[i][0] = particle1;
bonded14IndexArray[i][1] = particle2; bonded14IndexArray[i][1] = particle2;
bonded14ParamArray[i][0] = static_cast<RealOpenMM>(radius); bonded14ParamArray[i][0] = radius;
bonded14ParamArray[i][1] = static_cast<RealOpenMM>(4.0*depth); bonded14ParamArray[i][1] = 4.0*depth;
bonded14ParamArray[i][2] = static_cast<RealOpenMM>(charge); bonded14ParamArray[i][2] = charge;
} }
bondForce.initialize(system.getNumParticles(), num14, 2, bonded14IndexArray, data.threads); bondForce.initialize(system.getNumParticles(), num14, 2, bonded14IndexArray, data.threads);
...@@ -616,19 +599,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -616,19 +599,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
} }
else if (nonbondedMethod == PME) { else if (nonbondedMethod == PME) {
double alpha; double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]); NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha; ewaldAlpha = alpha;
} }
if (nonbondedMethod == Ewald || nonbondedMethod == PME) else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = alpha;
useSwitchingFunction = false;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI); ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
else if(nonbondedMethod == LJPME){
for (int atom = 0; atom < numParticles; atom++) {
// Dispersion self term
ewaldSelfEnergy += pow(ewaldDispersionAlpha, 6.0) * C6params[atom]*C6params[atom] / 12.0;
}
}
} else {
ewaldSelfEnergy = 0.0; ewaldSelfEnergy = 0.0;
}
rfDielectric = force.getReactionFieldDielectric(); rfDielectric = force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection()) if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force); dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME); data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
} }
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
...@@ -646,18 +645,33 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -646,18 +645,33 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
} }
} }
if (nonbondedMethod == LJPME) {
// If available, use the optimized PME implementation.
vector<string> kernelNames;
kernelNames.push_back("CalcPmeReciprocalForce");
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
}
}
} }
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
RealVec* boxVectors = extractBoxVectors(context); Vec3* boxVectors = extractBoxVectors(context);
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0); double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald); bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff) if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric); nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
if (data.isPeriodic) { if (data.isPeriodic) {
RealVec* boxVectors = extractBoxVectors(context); Vec3* boxVectors = extractBoxVectors(context);
double minAllowedSize = 1.999999*nonbondedCutoff; double minAllowedSize = 1.999999*nonbondedCutoff;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize) if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff."); throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
...@@ -669,9 +683,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -669,9 +683,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded->setUsePME(ewaldAlpha, gridSize); nonbonded->setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction) if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance); nonbonded->setUseSwitchingFunction(switchingDistance);
if (ljpme){
nonbonded->setUsePME(ewaldAlpha, gridSize);
nonbonded->setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
double nonbondedEnergy = 0; double nonbondedEnergy = 0;
if (includeDirect) if (includeDirect)
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads); nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
if (includeReciprocal) { if (includeReciprocal) {
if (useOptimizedPme) { if (useOptimizedPme) {
PmeIO io(&posq[0], &data.threadForce[0][0], numParticles); PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
...@@ -680,13 +698,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -680,13 +698,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io); nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
} }
else else
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL); nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
} }
energy += nonbondedEnergy; energy += nonbondedEnergy;
if (includeDirect) { if (includeDirect) {
ReferenceLJCoulomb14 nonbonded14; ReferenceLJCoulomb14 nonbonded14;
bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14); bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (data.isPeriodic) if (data.isPeriodic && nonbondedMethod != LJPME)
energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]); energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
} }
return energy; return energy;
...@@ -726,9 +744,9 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -726,9 +744,9 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
force.getExceptionParameters(nb14s[i], particle1, particle2, charge, radius, depth); force.getExceptionParameters(nb14s[i], particle1, particle2, charge, radius, depth);
bonded14IndexArray[i][0] = particle1; bonded14IndexArray[i][0] = particle1;
bonded14IndexArray[i][1] = particle2; bonded14IndexArray[i][1] = particle2;
bonded14ParamArray[i][0] = static_cast<RealOpenMM>(radius); bonded14ParamArray[i][0] = radius;
bonded14ParamArray[i][1] = static_cast<RealOpenMM>(4.0*depth); bonded14ParamArray[i][1] = 4.0*depth;
bonded14ParamArray[i][2] = static_cast<RealOpenMM>(charge); bonded14ParamArray[i][2] = charge;
} }
// Recompute the coefficient for the dispersion correction. // Recompute the coefficient for the dispersion correction.
...@@ -739,7 +757,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -739,7 +757,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
} }
void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const { void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME) if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME"); throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme) if (useOptimizedPme)
optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz); optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
...@@ -751,6 +769,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ...@@ -751,6 +769,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
} }
} }
void CpuCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedDispersionPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
}
CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) { CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) {
} }
...@@ -864,9 +895,9 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C ...@@ -864,9 +895,9 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
} }
double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
RealVec* boxVectors = extractBoxVectors(context); Vec3* boxVectors = extractBoxVectors(context);
double energy = 0; double energy = 0;
bool periodic = (nonbondedMethod == CutoffPeriodic); bool periodic = (nonbondedMethod == CutoffPeriodic);
if (nonbondedMethod != NoCutoff) if (nonbondedMethod != NoCutoff)
...@@ -953,7 +984,7 @@ void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCFo ...@@ -953,7 +984,7 @@ void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCFo
double CpuCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (data.isPeriodic) { if (data.isPeriodic) {
RealVec& boxSize = extractBoxSize(context); Vec3& boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]}; float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
obc.setPeriodic(floatBoxSize); obc.setPeriodic(floatBoxSize);
} }
...@@ -1024,14 +1055,14 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB ...@@ -1024,14 +1055,14 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
vector<double> parameters; vector<double> parameters;
force.getParticleParameters(i, parameters); force.getParticleParameters(i, parameters);
for (int j = 0; j < numPerParticleParameters; j++) for (int j = 0; j < numPerParticleParameters; j++)
particleParamArray[i][j] = static_cast<RealOpenMM>(parameters[j]); particleParamArray[i][j] = parameters[j];
} }
for (int i = 0; i < numPerParticleParameters; i++) for (int i = 0; i < numPerParticleParameters; i++)
particleParameterNames.push_back(force.getPerParticleParameterName(i)); particleParameterNames.push_back(force.getPerParticleParameterName(i));
for (int i = 0; i < force.getNumGlobalParameters(); i++) for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i)); globalParameterNames.push_back(force.getGlobalParameterName(i));
nonbondedMethod = CalcCustomGBForceKernel::NonbondedMethod(force.getNonbondedMethod()); nonbondedMethod = CalcCustomGBForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedCutoff = (RealOpenMM) force.getCutoffDistance(); nonbondedCutoff = force.getCutoffDistance();
if (nonbondedMethod != NoCutoff) if (nonbondedMethod != NoCutoff)
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, force.getNumExclusions() > 0, exclusions); data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, force.getNumExclusions() > 0, exclusions);
...@@ -1133,9 +1164,9 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB ...@@ -1133,9 +1164,9 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
} }
double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
vector<RealVec>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
RealOpenMM energy = 0; double energy = 0;
RealVec* boxVectors = extractBoxVectors(context); Vec3* boxVectors = extractBoxVectors(context);
if (data.isPeriodic) if (data.isPeriodic)
ixn->setPeriodic(extractBoxSize(context)); ixn->setPeriodic(extractBoxSize(context));
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
...@@ -1165,7 +1196,7 @@ void CpuCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, c ...@@ -1165,7 +1196,7 @@ void CpuCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, c
vector<double> parameters; vector<double> parameters;
force.getParticleParameters(i, parameters); force.getParticleParameters(i, parameters);
for (int j = 0; j < numParameters; j++) for (int j = 0; j < numParameters; j++)
particleParamArray[i][j] = static_cast<RealOpenMM>(parameters[j]); particleParamArray[i][j] = static_cast<double>(parameters[j]);
} }
} }
...@@ -1208,7 +1239,7 @@ double CpuCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -1208,7 +1239,7 @@ double CpuCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
for (int i = 0; i < (int) globalParameterNames.size(); i++) for (int i = 0; i < (int) globalParameterNames.size(); i++)
globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]); globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]);
if (nonbondedMethod == CutoffPeriodic) { if (nonbondedMethod == CutoffPeriodic) {
RealVec* boxVectors = extractBoxVectors(context); Vec3* boxVectors = extractBoxVectors(context);
double minAllowedSize = 2*cutoffDistance; double minAllowedSize = 2*cutoffDistance;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize) if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff."); throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
...@@ -1232,7 +1263,7 @@ void CpuCalcCustomManyParticleForceKernel::copyParametersToContext(ContextImpl& ...@@ -1232,7 +1263,7 @@ void CpuCalcCustomManyParticleForceKernel::copyParametersToContext(ContextImpl&
int type; int type;
force.getParticleParameters(i, parameters, type); force.getParticleParameters(i, parameters, type);
for (int j = 0; j < numParameters; j++) for (int j = 0; j < numParameters; j++)
particleParamArray[i][j] = static_cast<RealOpenMM>(parameters[j]); particleParamArray[i][j] = static_cast<double>(parameters[j]);
} }
} }
...@@ -1269,7 +1300,7 @@ void CpuIntegrateLangevinStepKernel::initialize(const System& system, const Lang ...@@ -1269,7 +1300,7 @@ void CpuIntegrateLangevinStepKernel::initialize(const System& system, const Lang
int numParticles = system.getNumParticles(); int numParticles = system.getNumParticles();
masses.resize(numParticles); masses.resize(numParticles);
for (int i = 0; i < numParticles; ++i) for (int i = 0; i < numParticles; ++i)
masses[i] = static_cast<RealOpenMM>(system.getParticleMass(i)); masses[i] = static_cast<double>(system.getParticleMass(i));
data.random.initialize(integrator.getRandomNumberSeed(), data.threads.getNumThreads()); data.random.initialize(integrator.getRandomNumberSeed(), data.threads.getNumThreads());
} }
...@@ -1277,16 +1308,15 @@ void CpuIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langevi ...@@ -1277,16 +1308,15 @@ void CpuIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langevi
double temperature = integrator.getTemperature(); double temperature = integrator.getTemperature();
double friction = integrator.getFriction(); double friction = integrator.getFriction();
double stepSize = integrator.getStepSize(); double stepSize = integrator.getStepSize();
vector<RealVec>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
vector<RealVec>& velData = extractVelocities(context); vector<Vec3>& velData = extractVelocities(context);
vector<RealVec>& forceData = extractForces(context); vector<Vec3>& forceData = extractForces(context);
if (dynamics == 0 || temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) { if (dynamics == 0 || temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
// Recreate the computation objects with the new parameters. // Recreate the computation objects with the new parameters.
if (dynamics) if (dynamics)
delete dynamics; delete dynamics;
RealOpenMM tau = (friction == 0.0 ? 0.0 : 1.0/friction); dynamics = new CpuLangevinDynamics(context.getSystem().getNumParticles(), stepSize, friction, temperature, data.threads, data.random);
dynamics = new CpuLangevinDynamics(context.getSystem().getNumParticles(), stepSize, tau, temperature, data.threads, data.random);
dynamics->setReferenceConstraintAlgorithm(&extractConstraints(context)); dynamics->setReferenceConstraintAlgorithm(&extractConstraints(context));
prevTemp = temperature; prevTemp = temperature;
prevFriction = friction; prevFriction = friction;
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Authors: Peter Eastman * Authors: Peter Eastman
* Contributors: * Contributors:
* *
...@@ -29,45 +29,15 @@ ...@@ -29,45 +29,15 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuLangevinDynamics::Update1Task : public ThreadPool::Task { CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, double deltaT, double friction, double temperature, ThreadPool& threads, CpuRandom& random) :
public: ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
Update1Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate1(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update2Task : public ThreadPool::Task {
public:
Update2Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate2(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update3Task : public ThreadPool::Task {
public:
Update3Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate3(threadIndex);
}
CpuLangevinDynamics& owner;
};
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, tau, temperature), threads(threads), random(random) {
} }
CpuLangevinDynamics::~CpuLangevinDynamics() { CpuLangevinDynamics::~CpuLangevinDynamics() {
} }
void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities, void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
vector<RealVec>& forces, vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) { vector<Vec3>& forces, vector<double>& inverseMasses, vector<Vec3>& xPrime) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
...@@ -79,13 +49,12 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCo ...@@ -79,13 +49,12 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update1Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate1(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities, void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
vector<RealVec>& forces, vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) { vector<Vec3>& forces, vector<double>& inverseMasses, vector<Vec3>& xPrime) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
...@@ -97,13 +66,12 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo ...@@ -97,13 +66,12 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update2Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate2(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities, void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<Vec3>& atomCoordinates, vector<Vec3>& velocities,
vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) { vector<double>& inverseMasses, vector<Vec3>& xPrime) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
...@@ -114,44 +82,44 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo ...@@ -114,44 +82,44 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update3Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate3(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
void CpuLangevinDynamics::threadUpdate1(int threadIndex) { void CpuLangevinDynamics::threadUpdate1(int threadIndex) {
const RealOpenMM tau = getTau(); double dt = getDeltaT();
const RealOpenMM vscale = EXP(-getDeltaT()/tau); double friction = getFriction();
const RealOpenMM fscale = (1-vscale)*tau; const double vscale = exp(-dt*friction);
const RealOpenMM kT = BOLTZ*getTemperature(); const double fscale = (friction == 0 ? dt : (1-vscale)/friction);
const RealOpenMM noisescale = SQRT(2*kT/tau)*SQRT(0.5*(1-vscale*vscale)*tau); const double kT = BOLTZ*getTemperature();
const double noisescale = sqrt(kT*(1-vscale*vscale));
int start = threadIndex*numberOfAtoms/threads.getNumThreads(); int start = threadIndex*numberOfAtoms/threads.getNumThreads();
int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads(); int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
if (inverseMasses[i] != 0.0) { if (inverseMasses[i] != 0.0) {
RealOpenMM sqrtInvMass = SQRT(inverseMasses[i]); double sqrtInvMass = sqrt(inverseMasses[i]);
RealVec noise(random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex)); Vec3 noise(random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex), random.getGaussianRandom(threadIndex));
velocities[i] = velocities[i]*vscale + forces[i]*(fscale*inverseMasses[i]) + noise*(noisescale*sqrtInvMass); velocities[i] = velocities[i]*vscale + forces[i]*(fscale*inverseMasses[i]) + noise*(noisescale*sqrtInvMass);
} }
} }
} }
void CpuLangevinDynamics::threadUpdate2(int threadIndex) { void CpuLangevinDynamics::threadUpdate2(int threadIndex) {
const RealOpenMM dt = getDeltaT(); const double dt = getDeltaT();
int start = threadIndex*numberOfAtoms/threads.getNumThreads(); int start = threadIndex*numberOfAtoms/threads.getNumThreads();
int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads(); int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
if (inverseMasses[i] != 0.0) { if (inverseMasses[i] != 0.0) {
RealOpenMM sqrtInvMass = SQRT(inverseMasses[i]); double sqrtInvMass = sqrt(inverseMasses[i]);
xPrime[i] = atomCoordinates[i]+velocities[i]*dt; xPrime[i] = atomCoordinates[i]+velocities[i]*dt;
} }
} }
} }
void CpuLangevinDynamics::threadUpdate3(int threadIndex) { void CpuLangevinDynamics::threadUpdate3(int threadIndex) {
const RealOpenMM invStepSize = 1.0/getDeltaT(); const double invStepSize = 1.0/getDeltaT();
int start = threadIndex*numberOfAtoms/threads.getNumThreads(); int start = threadIndex*numberOfAtoms/threads.getNumThreads();
int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads(); int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. * * Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
*/ */
class CpuNeighborList::Voxels { class CpuNeighborList::Voxels {
public: public:
Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* boxVectors, bool usePeriodic) : Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const Vec3* boxVectors, bool usePeriodic) :
blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) { blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
...@@ -409,21 +409,11 @@ private: ...@@ -409,21 +409,11 @@ private:
vector<vector<vector<pair<float, int> > > > bins; vector<vector<vector<pair<float, int> > > > bins;
}; };
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
public:
ThreadTask(CpuNeighborList& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeNeighborList(threads, threadIndex);
}
CpuNeighborList& owner;
};
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) { CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
} }
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions, void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
const RealVec* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) { const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
int numBlocks = (numAtoms+blockSize-1)/blockSize; int numBlocks = (numAtoms+blockSize-1)/blockSize;
blockNeighbors.resize(numBlocks); blockNeighbors.resize(numBlocks);
blockExclusions.resize(numBlocks); blockExclusions.resize(numBlocks);
...@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float ...@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Sort the atoms based on a Hilbert curve. // Sort the atoms based on a Hilbert curve.
atomBins.resize(numAtoms); atomBins.resize(numAtoms);
ThreadTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeNeighborList(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
sort(atomBins.begin(), atomBins.end()); sort(atomBins.begin(), atomBins.end());
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ReferencePME.h" #include "ReferencePME.h"
#include "openmm/internal/gmx_atomic.h" #include "openmm/internal/gmx_atomic.h"
#include <algorithm> #include <algorithm>
#include <iostream>
// In case we're using some primitive version of Visual Studio this will // In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined. // make sure that erf() and erfc() are defined.
...@@ -41,23 +42,14 @@ using namespace OpenMM; ...@@ -41,23 +42,14 @@ using namespace OpenMM;
const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M)); const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048; const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
class CpuNonbondedForce::ComputeDirectTask : public ThreadPool::Task {
public:
ComputeDirectTask(CpuNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeDirect(threads, threadIndex);
}
CpuNonbondedForce& owner;
};
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor CpuNonbondedForce constructor
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) { CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
} }
CpuNonbondedForce::~CpuNonbondedForce() { CpuNonbondedForce::~CpuNonbondedForce() {
...@@ -78,10 +70,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig ...@@ -78,10 +70,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
tableIsValid = false; tableIsValid = false;
cutoff = true; cutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors; neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0); krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0); crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
} if(alphaDispersionEwald != 0.0f){
// We set this here, in case setUseCutoff is called after the dispersion alpha is set.
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -96,7 +99,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -96,7 +99,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
switchingDistance = distance; switchingDistance = distance;
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff also been set, and the smallest side of the periodic box is at least twice the cutoff
...@@ -106,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -106,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) { void CpuNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
assert(cutoff); assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance); assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
...@@ -124,11 +127,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -124,11 +127,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0); periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0); periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 || triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 || periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0); periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Ewald summation. Set the force to use Ewald summation.
...@@ -139,18 +142,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -139,18 +142,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) { void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald) if (alpha != alphaEwald)
tableIsValid = false; tableIsValid = false;
alphaEwald = alpha; alphaEwald = alpha;
numRx = kmaxx; numRx = kmaxx;
numRy = kmaxy; numRy = kmaxy;
numRz = kmaxz; numRz = kmaxz;
ewald = true; ewald = true;
tabulateEwaldScaleFactor(); tabulateEwaldScaleFactor();
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation. Set the force to use Particle-Mesh Ewald (PME) summation.
...@@ -159,19 +162,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -159,19 +162,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) { void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald) if (alpha != alphaEwald)
tableIsValid = false; tableIsValid = false;
alphaEwald = alpha; alphaEwald = alpha;
meshDim[0] = meshSize[0]; meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1]; meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2]; meshDim[2] = meshSize[2];
pme = true; pme = true;
tabulateEwaldScaleFactor(); tabulateEwaldScaleFactor();
} }
void CpuNonbondedForce::tabulateEwaldScaleFactor() { /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseLJPME(float alpha, int meshSize[3]) {
if (alpha != alphaDispersionEwald)
expTableIsValid = false;
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
tabulateExpTerms();
if(cutoffDistance != 0.0f){
// We set this here, in case setUseLJPME is called after the cutoff is set
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if (tableIsValid) if (tableIsValid)
return; return;
tableIsValid = true; tableIsValid = true;
...@@ -187,10 +220,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -187,10 +220,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR); ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
} }
} }
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, void CpuNonbondedForce::tabulateExpTerms() {
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions, if (expTableIsValid)
vector<RealVec>& forces, double* totalEnergy) const { return;
expTableIsValid = true;
exptermsDX = cutoffDistance/NUM_TABLE_POINTS;
exptermsDXInv = 1.0f/exptermsDX;
exptermsTable.resize(NUM_TABLE_POINTS+4);
dExptermsTable.resize(NUM_TABLE_POINTS+4);
for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
double r = i*ewaldDX;
double dalphaR = alphaDispersionEwald*r;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
exptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
dExptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<float> &C6params, const vector<set<int> >& exclusions,
vector<Vec3>& forces, double* totalEnergy) const {
typedef std::complex<float> d_complex; typedef std::complex<float> d_complex;
static const float epsilon = 1.0; static const float epsilon = 1.0;
...@@ -203,14 +256,37 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -203,14 +256,37 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
if (pme) { if (pme) {
pme_t pmedata; pme_t pmedata;
pme_init(&pmedata, alphaEwald, numberOfAtoms, meshDim, 5, 1); pme_init(&pmedata, alphaEwald, numberOfAtoms, meshDim, 5, 1);
vector<RealOpenMM> charges(numberOfAtoms); vector<double> charges(numberOfAtoms);
for (int i = 0; i < numberOfAtoms; i++) for (int i = 0; i < numberOfAtoms; i++)
charges[i] = posq[4*i+3]; charges[i] = posq[4*i+3];
RealOpenMM recipEnergy = 0.0; double recipEnergy = 0.0;
pme_exec(pmedata, atomCoordinates, forces, charges, periodicBoxVectors, &recipEnergy); pme_exec(pmedata, atomCoordinates, forces, charges, periodicBoxVectors, &recipEnergy);
if (totalEnergy) if (totalEnergy)
*totalEnergy += recipEnergy; *totalEnergy += recipEnergy;
pme_destroy(pmedata); pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<Vec3> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = C6params[i];
dpmeforces.push_back(Vec3());
}
double recipDispersionEnergy = 0.0;
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
}
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
pme_destroy(pmedata);
}
} }
// Ewald method // Ewald method
...@@ -224,7 +300,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -224,7 +300,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup K-vectors // setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z] #define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3); vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms); vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms); vector<d_complex> tab_qxyz(numberOfAtoms);
...@@ -232,15 +308,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -232,15 +308,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
for (int i = 0; (i < numberOfAtoms); i++) { for (int i = 0; (i < numberOfAtoms); i++) {
float* pos = posq+4*i; float* pos = posq+4*i;
for (int m = 0; (m < 3); m++) for (int m = 0; (m < 3); m++)
EIR(0, i, m) = d_complex(1,0); EIR(0, i, m) = d_complex(1,0);
for (int m=0; (m<3); m++) for (int m=0; (m<3); m++)
EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]), EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
sin(pos[m]*recipBoxSize[m])); sin(pos[m]*recipBoxSize[m]));
for (int j=2; (j<kmax); j++) for (int j=2; (j<kmax); j++)
for (int m=0; (m<3); m++) for (int m=0; (m<3); m++)
EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m); EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
} }
// calculate reciprocal space energy and forces // calculate reciprocal space energy and forces
...@@ -254,11 +330,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -254,11 +330,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
float ky = ry * recipBoxSize[1]; float ky = ry * recipBoxSize[1];
if (ry >= 0) { if (ry >= 0) {
for (int n = 0; n < numberOfAtoms; n++) for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1); tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
} }
else { else {
for (int n = 0; n < numberOfAtoms; n++) for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1)); tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
} }
for (int rz = lowrz; rz < numRz; rz++) { for (int rz = lowrz; rz < numRz; rz++) {
if (rz >= 0) { if (rz >= 0) {
...@@ -300,14 +376,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -300,14 +376,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
} }
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters, void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) { const vector<float>& C6params, const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
this->posq = posq; this->posq = posq;
this->atomCoordinates = &atomCoordinates[0]; this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0]; this->atomParameters = &atomParameters[0];
this->C6params = &C6params[0];
this->exclusions = &exclusions[0]; this->exclusions = &exclusions[0];
this->threadForce = &threadForce; this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL); includeEnergy = (totalEnergy != NULL);
...@@ -318,8 +395,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -318,8 +395,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to subtract the exclusions. // Signal the threads to subtract the exclusions.
...@@ -350,9 +426,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -350,9 +426,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float* forces = &(*threadForce)[threadIndex][0]; float* forces = &(*threadForce)[threadIndex][0];
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0); fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0); fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (ewald || pme) { if (ewald || pme || ljpme) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
while (true) { while (true) {
int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1); int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
if (nextBlock >= neighborList->getNumBlocks()) if (nextBlock >= neighborList->getNumBlocks())
...@@ -370,7 +445,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -370,7 +445,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
break; break;
int end = min(start+groupSize, numberOfAtoms); int end = min(start+groupSize, numberOfAtoms);
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f); fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]); float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]);
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) { for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
if (*iter > i) { if (*iter > i) {
...@@ -394,7 +469,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -394,7 +469,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR; threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
} }
else if (includeEnergy) else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3]; threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
if (ljpme) {
float C6ij = C6params[i]*C6params[j];
float inverseR2 = 1.0f/r2;
float emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
if(includeEnergy)
threadEnergy[threadIndex] += emult;
float dEdR = -6.0f*C6ij*inverseR2*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j);
}
} }
} }
} }
...@@ -444,7 +530,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -444,7 +530,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
} }
float sig = atomParameters[ii].first + atomParameters[jj].first; float sig = atomParameters[ii].first + atomParameters[jj].first;
float sig2 = inverseR*sig; float sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
float sig6 = sig2*sig2*sig2; float sig6 = sig2*sig2*sig2;
float eps = atomParameters[ii].second*atomParameters[jj].second; float eps = atomParameters[ii].second*atomParameters[jj].second;
...@@ -476,7 +562,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -476,7 +562,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
fvec4 result = deltaR*dEdR; fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii); (fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj); (fvec4(forces+4*jj)-result).store(forces+4*jj);
} }
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
deltaR = posJ-posI; deltaR = posJ-posI;
...@@ -502,3 +588,18 @@ float CpuNonbondedForce::erfcApprox(float x) { ...@@ -502,3 +588,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
return coeff1*erfcTable[index] + coeff2*erfcTable[index+1]; return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
} }
float CpuNonbondedForce::exptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*exptermsTable[index] + coeff2*exptermsTable[index+1];
}
float CpuNonbondedForce::dExptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*dExptermsTable[index] + coeff2*dExptermsTable[index+1];
}
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h" #include "CpuNonbondedForceVec4.h"
#include <algorithm> #include <algorithm>
#include <iostream>
using namespace std; using namespace std;
using namespace OpenMM; using namespace OpenMM;
...@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions. // Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType; PeriodicType periodicType;
fvec4 blockCenter; fvec4 blockCenter;
if (!periodic) { if (!periodic) {
...@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces ...@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
template <int PERIODIC_TYPE> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) { void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4]; fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f); fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
...@@ -278,9 +277,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -278,9 +277,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]); fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first); fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second); fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
fvec4 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic); const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block. // Loop over neighbors for this block.
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
...@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 sig2 = inverseR*sig; fvec4 sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
fvec4 sig6 = sig2*sig2*sig2; fvec4 sig6 = sig2*sig2*sig2;
fvec4 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6; fvec4 eps = blockAtomEpsilon*atomEpsilon;
fvec4 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f); dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f); energy = epsSig6*(sig6-1.0f);
if (useSwitch) { if (useSwitch) {
...@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r; dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue; energy *= switchValue;
} }
if (ljpme) {
fvec4 C6ij = C6s*C6params[atom];
fvec4 inverseR2 = inverseR*inverseR;
fvec4 mysig2 = sig*sig;
fvec4 mysig6 = mysig2*mysig2*mysig2;
fvec4 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec4 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
} }
else { else {
energy = 0.0f; energy = 0.0f;
...@@ -362,7 +374,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -362,7 +374,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
} }
// Record the forces on the block atoms. // Record the forces on the block atoms.
fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f}; fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f};
transpose(f[0], f[1], f[2], f[3]); transpose(f[0], f[1], f[2], f[3]);
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
...@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) { ...@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
transpose(t1, t2, t3, t4); transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2; return coeff1*t1 + coeff2*t2;
} }
fvec4 CpuNonbondedForceVec4::exptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&exptermsTable[index[0]]);
fvec4 t2(&exptermsTable[index[1]]);
fvec4 t3(&exptermsTable[index[2]]);
fvec4 t4(&exptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::dExptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&dExptermsTable[index[0]]);
fvec4 t2(&dExptermsTable[index[1]]);
fvec4 t3(&dExptermsTable[index[2]]);
fvec4 t4(&dExptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h" #include "openmm/internal/hardware.h"
#include <algorithm> #include <algorithm>
#include <iostream>
using namespace std; using namespace std;
using namespace OpenMM; using namespace OpenMM;
...@@ -80,8 +81,7 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() { ...@@ -80,8 +81,7 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic}; enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions. // Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType; PeriodicType periodicType;
fvec4 blockCenter; fvec4 blockCenter;
if (!periodic) { if (!periodic) {
...@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
blockAtomCharge *= ONE_4PI_EPS0; blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first); fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second); fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
fvec8 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]], C6params[blockAtom[4]], C6params[blockAtom[5]], C6params[blockAtom[6]], C6params[blockAtom[7]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic); const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
...@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec8 sig2 = inverseR*sig; fvec8 sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
fvec8 sig6 = sig2*sig2*sig2; fvec8 sig6 = sig2*sig2*sig2;
fvec8 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6; fvec8 eps = blockAtomEpsilon*atomEpsilon;
fvec8 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f); dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f); energy = epsSig6*(sig6-1.0f);
if (useSwitch) { if (useSwitch) {
...@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r; dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue; energy *= switchValue;
} }
if (ljpme) {
fvec8 C6ij = C6s*C6params[atom];
fvec8 inverseR2 = inverseR*inverseR;
fvec8 mysig2 = sig*sig;
fvec8 mysig6 = mysig2*mysig2*mysig2;
fvec8 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec8 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
} }
else { else {
energy = 0.0f; energy = 0.0f;
...@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) { ...@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4); transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2; return coeff1*s1 + coeff2*s2;
} }
fvec8 CpuNonbondedForceVec8::exptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&exptermsTable[indexLower[0]]);
fvec4 t2(&exptermsTable[indexLower[1]]);
fvec4 t3(&exptermsTable[indexLower[2]]);
fvec4 t4(&exptermsTable[indexLower[3]]);
fvec4 t5(&exptermsTable[indexUpper[0]]);
fvec4 t6(&exptermsTable[indexUpper[1]]);
fvec4 t7(&exptermsTable[indexUpper[2]]);
fvec4 t8(&exptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::dExptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&dExptermsTable[indexLower[0]]);
fvec4 t2(&dExptermsTable[indexLower[1]]);
fvec4 t3(&dExptermsTable[indexLower[2]]);
fvec4 t4(&dExptermsTable[indexLower[3]]);
fvec4 t5(&dExptermsTable[indexUpper[0]]);
fvec4 t6(&dExptermsTable[indexUpper[1]]);
fvec4 t7(&dExptermsTable[indexUpper[2]]);
fvec4 t8(&dExptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
#endif #endif
...@@ -127,6 +127,8 @@ void CpuPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -127,6 +127,8 @@ void CpuPlatform::contextDestroyed(ContextImpl& context) const {
PlatformData* data = contextData[&context]; PlatformData* data = contextData[&context];
delete data; delete data;
contextData.erase(&context); contextData.erase(&context);
ReferencePlatform::PlatformData* refPlatformData = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
delete refPlatformData;
} }
CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) { CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. * * Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -35,56 +35,10 @@ ...@@ -35,56 +35,10 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuSETTLE::ApplyToPositionsTask : public ThreadPool::Task {
public:
ApplyToPositionsTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), atomCoordinatesP(atomCoordinatesP),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
vector<OpenMM::RealVec>& atomCoordinatesP;
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
class CpuSETTLE::ApplyToVelocitiesTask : public ThreadPool::Task {
public:
ApplyToVelocitiesTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), velocities(velocities),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
vector<OpenMM::RealVec>& velocities;
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) { CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) {
int numBlocks = 10*threads.getNumThreads(); int numBlocks = 10*threads.getNumThreads();
int numClusters = settle.getNumClusters(); int numClusters = settle.getNumClusters();
vector<RealOpenMM> mass(system.getNumParticles()); vector<double> mass(system.getNumParticles());
for (int i = 0; i < system.getNumParticles(); i++) for (int i = 0; i < system.getNumParticles(); i++)
mass[i] = system.getParticleMass(i); mass[i] = system.getParticleMass(i);
for (int i = 0; i < numBlocks; i++) { for (int i = 0; i < numBlocks; i++) {
...@@ -93,7 +47,7 @@ CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settl ...@@ -93,7 +47,7 @@ CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settl
if (start != end) { if (start != end) {
int numThreadClusters = end-start; int numThreadClusters = end-start;
vector<int> atom1(numThreadClusters), atom2(numThreadClusters), atom3(numThreadClusters); vector<int> atom1(numThreadClusters), atom2(numThreadClusters), atom3(numThreadClusters);
vector<RealOpenMM> distance1(numThreadClusters), distance2(numThreadClusters); vector<double> distance1(numThreadClusters), distance2(numThreadClusters);
for (int j = 0; j < numThreadClusters; j++) for (int j = 0; j < numThreadClusters; j++)
settle.getClusterParameters(start+j, atom1[j], atom2[j], atom3[j], distance1[j], distance2[j]); settle.getClusterParameters(start+j, atom1[j], atom2[j], atom3[j], distance1[j], distance2[j]);
threadSettle.push_back(new ReferenceSETTLEAlgorithm(atom1, atom2, atom3, distance1, distance2, mass)); threadSettle.push_back(new ReferenceSETTLEAlgorithm(atom1, atom2, atom3, distance1, distance2, mass));
...@@ -106,14 +60,30 @@ CpuSETTLE::~CpuSETTLE() { ...@@ -106,14 +60,30 @@ CpuSETTLE::~CpuSETTLE() {
delete threadSettle[i]; delete threadSettle[i];
} }
void CpuSETTLE::apply(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) { void CpuSETTLE::apply(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& atomCoordinatesP, vector<double>& inverseMasses, double tolerance) {
ApplyToPositionsTask task(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance, threadSettle); gmx_atomic_t atomicCounter;
threads.execute(task); gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
});
threads.waitForThreads(); threads.waitForThreads();
} }
void CpuSETTLE::applyToVelocities(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses, RealOpenMM tolerance) { void CpuSETTLE::applyToVelocities(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& velocities, vector<double>& inverseMasses, double tolerance) {
ApplyToVelocitiesTask task(atomCoordinates, velocities, inverseMasses, tolerance, threadSettle); gmx_atomic_t atomicCounter;
threads.execute(task); gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
});
threads.waitForThreads(); threads.waitForThreads();
} }
...@@ -16,7 +16,6 @@ ENDFOREACH(file) ...@@ -16,7 +16,6 @@ ENDFOREACH(file)
ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES}) ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}_static ${PTHREADS_LIB_STATIC}) TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}_static ${PTHREADS_LIB_STATIC})
#-DPTW32_STATIC_LIB only works for the windows pthreads. SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY")
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET}) INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
...@@ -51,16 +51,16 @@ using namespace std; ...@@ -51,16 +51,16 @@ using namespace std;
void testNeighborList(bool periodic, bool triclinic) { void testNeighborList(bool periodic, bool triclinic) {
const int numParticles = 500; const int numParticles = 500;
const float cutoff = 2.0f; const float cutoff = 2.0f;
RealVec boxVectors[3]; Vec3 boxVectors[3];
if (triclinic) { if (triclinic) {
boxVectors[0] = RealVec(10, 0, 0); boxVectors[0] = Vec3(10, 0, 0);
boxVectors[1] = RealVec(4, 9, 0); boxVectors[1] = Vec3(4, 9, 0);
boxVectors[2] = RealVec(-3, -3.5, 11); boxVectors[2] = Vec3(-3, -3.5, 11);
} }
else { else {
boxVectors[0] = RealVec(10, 0, 0); boxVectors[0] = Vec3(10, 0, 0);
boxVectors[1] = RealVec(0, 9, 0); boxVectors[1] = Vec3(0, 9, 0);
boxVectors[2] = RealVec(0, 0, 11); boxVectors[2] = Vec3(0, 0, 11);
} }
const float boxSize[3] = {(float) boxVectors[0][0], (float) boxVectors[1][1], (float) boxVectors[2][2]}; const float boxSize[3] = {(float) boxVectors[0][0], (float) boxVectors[1][1], (float) boxVectors[2][2]};
const int blockSize = 8; const int blockSize = 8;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. * * Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -494,6 +494,10 @@ public: ...@@ -494,6 +494,10 @@ public:
CudaNonbondedUtilities& getNonbondedUtilities() { CudaNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded; return *nonbonded;
} }
/**
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void setCharges(const std::vector<double>& charges);
/** /**
* Get the thread used by this context for executing parallel computations. * Get the thread used by this context for executing parallel computations.
*/ */
...@@ -577,6 +581,12 @@ public: ...@@ -577,6 +581,12 @@ public:
* and order to be revalidated. * and order to be revalidated.
*/ */
void invalidateMolecules(); void invalidateMolecules();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/
bool invalidateMolecules(CudaForceInfo* force);
private: private:
/** /**
* Compute a sorted list of device indices in decreasing order of desirability * Compute a sorted list of device indices in decreasing order of desirability
...@@ -626,6 +636,7 @@ private: ...@@ -626,6 +636,7 @@ private:
CUfunction clearFourBuffersKernel; CUfunction clearFourBuffersKernel;
CUfunction clearFiveBuffersKernel; CUfunction clearFiveBuffersKernel;
CUfunction clearSixBuffersKernel; CUfunction clearSixBuffersKernel;
CUfunction setChargesKernel;
std::vector<CudaForceInfo*> forces; std::vector<CudaForceInfo*> forces;
std::vector<Molecule> molecules; std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups; std::vector<MoleculeGroup> moleculeGroups;
...@@ -638,6 +649,7 @@ private: ...@@ -638,6 +649,7 @@ private:
CudaArray* energyBuffer; CudaArray* energyBuffer;
CudaArray* energyParamDerivBuffer; CudaArray* energyParamDerivBuffer;
CudaArray* atomIndexDevice; CudaArray* atomIndexDevice;
CudaArray* chargeBuffer;
std::vector<std::string> energyParamDerivNames; std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace; std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex; std::vector<int> atomIndex;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. * * Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -198,7 +198,6 @@ public: ...@@ -198,7 +198,6 @@ public:
*/ */
void loadCheckpoint(ContextImpl& context, std::istream& stream); void loadCheckpoint(ContextImpl& context, std::istream& stream);
private: private:
class GetPositionsTask;
CudaContext& cu; CudaContext& cu;
}; };
...@@ -292,9 +291,11 @@ public: ...@@ -292,9 +291,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force); void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray* params;
}; };
...@@ -332,9 +333,11 @@ public: ...@@ -332,9 +333,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -375,9 +378,11 @@ public: ...@@ -375,9 +378,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force); void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
private: private:
class ForceInfo;
int numAngles; int numAngles;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray* params;
}; };
...@@ -415,9 +420,11 @@ public: ...@@ -415,9 +420,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force); void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
private: private:
class ForceInfo;
int numAngles; int numAngles;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -458,9 +465,11 @@ public: ...@@ -458,9 +465,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force); void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaArray* params; CudaArray* params;
}; };
...@@ -498,9 +507,11 @@ public: ...@@ -498,9 +507,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force); void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaArray* params1; CudaArray* params1;
CudaArray* params2; CudaArray* params2;
...@@ -539,9 +550,11 @@ public: ...@@ -539,9 +550,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force); void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
std::vector<int2> mapPositionsVec; std::vector<int2> mapPositionsVec;
CudaArray* coefficients; CudaArray* coefficients;
...@@ -582,9 +595,11 @@ public: ...@@ -582,9 +595,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force); void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
private: private:
class ForceInfo;
int numTorsions; int numTorsions;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -599,7 +614,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel { ...@@ -599,7 +614,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform), CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL), cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) { pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL), pmeDispersionBsplineModuliY(NULL),
pmeDispersionBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
} }
~CudaCalcNonbondedForceKernel(); ~CudaCalcNonbondedForceKernel();
/** /**
...@@ -636,6 +652,15 @@ public: ...@@ -636,6 +652,15 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class SortTrait : public CudaSort::SortTrait { class SortTrait : public CudaSort::SortTrait {
int getDataSize() const {return 8;} int getDataSize() const {return 8;}
...@@ -647,12 +672,14 @@ private: ...@@ -647,12 +672,14 @@ private:
const char* getMaxValue() const {return "make_int2(2147483647, 2147483647)";} const char* getMaxValue() const {return "make_int2(2147483647, 2147483647)";}
const char* getSortKey() const {return "value.y";} const char* getSortKey() const {return "value.y";}
}; };
class ForceInfo;
class PmeIO; class PmeIO;
class PmePreComputation; class PmePreComputation;
class PmePostComputation; class PmePostComputation;
class SyncStreamPreComputation; class SyncStreamPreComputation;
class SyncStreamPostComputation; class SyncStreamPostComputation;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
bool hasInitializedFFT; bool hasInitializedFFT;
CudaArray* sigmaEpsilon; CudaArray* sigmaEpsilon;
CudaArray* exceptionParams; CudaArray* exceptionParams;
...@@ -662,6 +689,9 @@ private: ...@@ -662,6 +689,9 @@ private:
CudaArray* pmeBsplineModuliX; CudaArray* pmeBsplineModuliX;
CudaArray* pmeBsplineModuliY; CudaArray* pmeBsplineModuliY;
CudaArray* pmeBsplineModuliZ; CudaArray* pmeBsplineModuliZ;
CudaArray* pmeDispersionBsplineModuliX;
CudaArray* pmeDispersionBsplineModuliY;
CudaArray* pmeDispersionBsplineModuliZ;
CudaArray* pmeAtomRange; CudaArray* pmeAtomRange;
CudaArray* pmeAtomGridIndex; CudaArray* pmeAtomGridIndex;
CudaArray* pmeEnergyBuffer; CudaArray* pmeEnergyBuffer;
...@@ -673,20 +703,29 @@ private: ...@@ -673,20 +703,29 @@ private:
CudaFFT3D* fft; CudaFFT3D* fft;
cufftHandle fftForward; cufftHandle fftForward;
cufftHandle fftBackward; cufftHandle fftBackward;
CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward;
CUfunction ewaldSumsKernel; CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel; CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel; CUfunction pmeGridIndexKernel;
CUfunction pmeDispersionGridIndexKernel;
CUfunction pmeSpreadChargeKernel; CUfunction pmeSpreadChargeKernel;
CUfunction pmeDispersionSpreadChargeKernel;
CUfunction pmeFinishSpreadChargeKernel; CUfunction pmeFinishSpreadChargeKernel;
CUfunction pmeDispersionFinishSpreadChargeKernel;
CUfunction pmeEvalEnergyKernel; CUfunction pmeEvalEnergyKernel;
CUfunction pmeEvalDispersionEnergyKernel;
CUfunction pmeConvolutionKernel; CUfunction pmeConvolutionKernel;
CUfunction pmeDispersionConvolutionKernel;
CUfunction pmeInterpolateForceKernel; CUfunction pmeInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines; CUfunction pmeInterpolateDispersionForceKernel;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha; double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int interpolateForceThreads; int interpolateForceThreads;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT; int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
...@@ -724,8 +763,10 @@ public: ...@@ -724,8 +763,10 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force); void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private: private:
class ForceInfo;
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes); void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes);
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
CudaArray* interactionGroupData; CudaArray* interactionGroupData;
...@@ -775,10 +816,12 @@ public: ...@@ -775,10 +816,12 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force); void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private: private:
class ForceInfo;
double prefactor, surfaceAreaFactor, cutoff; double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels; bool hasCreatedKernels;
int maxTiles; int maxTiles;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaArray* params; CudaArray* params;
CudaArray* bornSum; CudaArray* bornSum;
CudaArray* bornRadii; CudaArray* bornRadii;
...@@ -825,10 +868,12 @@ public: ...@@ -825,10 +868,12 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force); void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private: private:
class ForceInfo;
double cutoff; double cutoff;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs; bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues; int maxTiles, numComputedValues;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaParameterSet* computedValues; CudaParameterSet* computedValues;
CudaParameterSet* energyDerivs; CudaParameterSet* energyDerivs;
...@@ -882,9 +927,11 @@ public: ...@@ -882,9 +927,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force); void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
private: private:
class ForceInfo;
int numParticles; int numParticles;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
const System& system; const System& system;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -926,9 +973,11 @@ public: ...@@ -926,9 +973,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force); void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private: private:
class ForceInfo;
int numDonors, numAcceptors; int numDonors, numAcceptors;
bool hasInitializedKernel; bool hasInitializedKernel;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* donorParams; CudaParameterSet* donorParams;
CudaParameterSet* acceptorParams; CudaParameterSet* acceptorParams;
CudaArray* globals; CudaArray* globals;
...@@ -978,9 +1027,11 @@ public: ...@@ -978,9 +1027,11 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force);
private: private:
class ForceInfo;
int numGroups, numBonds; int numGroups, numBonds;
bool needEnergyParamDerivs; bool needEnergyParamDerivs;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
CudaArray* groupParticles; CudaArray* groupParticles;
...@@ -1031,8 +1082,10 @@ public: ...@@ -1031,8 +1082,10 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force); void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
private: private:
class ForceInfo;
int numBonds; int numBonds;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
...@@ -1077,7 +1130,9 @@ public: ...@@ -1077,7 +1130,9 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force); void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force);
private: private:
class ForceInfo;
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
bool hasInitializedKernel; bool hasInitializedKernel;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize; int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
...@@ -1139,9 +1194,11 @@ public: ...@@ -1139,9 +1194,11 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const GayBerneForce& force); void copyParametersToContext(ContextImpl& context, const GayBerneForce& force);
private: private:
class ForceInfo;
class ReorderListener; class ReorderListener;
void sortAtoms(); void sortAtoms();
CudaContext& cu; CudaContext& cu;
ForceInfo* info;
bool hasInitializedKernels; bool hasInitializedKernels;
int numRealParticles, numExceptions, maxNeighborBlocks; int numRealParticles, numExceptions, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod; GayBerneForce::NonbondedMethod nonbondedMethod;
...@@ -1432,7 +1489,7 @@ private: ...@@ -1432,7 +1489,7 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context); Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes); void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target); void recordGlobalValue(double value, GlobalTarget target, CustomIntegrator& integrator);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step); bool evaluateCondition(int step);
CudaContext& cu; CudaContext& cu;
......
...@@ -71,15 +71,16 @@ public: ...@@ -71,15 +71,16 @@ public:
/** /**
* Add a nonbonded interaction to be evaluated by the default interaction kernel. * Add a nonbonded interaction to be evaluated by the default interaction kernel.
* *
* @param usesCutoff specifies whether a cutoff should be applied to this interaction * @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction * @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
* @param usesExclusions specifies whether this interaction uses exclusions. If this is true, it must have identical exclusions to every other interaction. * @param usesExclusions specifies whether this interaction uses exclusions. If this is true, it must have identical exclusions to every other interaction.
* @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false) * @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false)
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded * @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction * @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated * @param forceGroup the force group in which the interaction should be calculated
* @param supportsPairList specifies whether this interaction can work with a neighbor list that uses a separate pair list
*/ */
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup); void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool supportsPairList=false);
/** /**
* Add a per-atom parameter that the default interaction kernel may depend on. * Add a per-atom parameter that the default interaction kernel may depend on.
*/ */
...@@ -189,6 +190,12 @@ public: ...@@ -189,6 +190,12 @@ public:
CudaArray& getInteractingAtoms() { CudaArray& getInteractingAtoms() {
return *interactingAtoms; return *interactingAtoms;
} }
/**
* Get the array containing single pairs in the neighbor list.
*/
CudaArray& getSinglePairs() {
return *singlePairs;
}
/** /**
* Get the array containing exclusion flags. * Get the array containing exclusion flags.
*/ */
...@@ -270,6 +277,8 @@ private: ...@@ -270,6 +277,8 @@ private:
CudaArray* interactingTiles; CudaArray* interactingTiles;
CudaArray* interactingAtoms; CudaArray* interactingAtoms;
CudaArray* interactionCount; CudaArray* interactionCount;
CudaArray* singlePairs;
CudaArray* singlePairCount;
CudaArray* blockCenter; CudaArray* blockCenter;
CudaArray* blockBoundingBox; CudaArray* blockBoundingBox;
CudaArray* sortedBlocks; CudaArray* sortedBlocks;
...@@ -288,8 +297,8 @@ private: ...@@ -288,8 +297,8 @@ private:
std::map<int, double> groupCutoff; std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList; bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags; int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
}; };
/** /**
......
...@@ -83,7 +83,7 @@ private: ...@@ -83,7 +83,7 @@ private:
std::vector<Kernel> kernels; std::vector<Kernel> kernels;
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions; std::vector<double> contextNonbondedFractions;
int* tileCounts; int2* interactionCounts;
CudaArray* contextForces; CudaArray* contextForces;
void* pinnedPositionBuffer; void* pinnedPositionBuffer;
long long* pinnedForceBuffer; long long* pinnedForceBuffer;
...@@ -439,6 +439,15 @@ public: ...@@ -439,6 +439,15 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class Task; class Task;
CudaPlatform::PlatformData& data; CudaPlatform::PlatformData& data;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. * * Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include <set> #include <set>
#include <sstream> #include <sstream>
#include <typeinfo> #include <typeinfo>
#include <sys/stat.h>
#include <cudaProfiler.h> #include <cudaProfiler.h>
#ifndef WIN32 #ifndef WIN32
#include <unistd.h> #include <unistd.h>
...@@ -107,7 +108,8 @@ static int executeInWindows(const string &command) { ...@@ -107,7 +108,8 @@ static int executeInWindows(const string &command) {
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0), const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), chargeBuffer(NULL),
integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
// Determine what compiler to use. // Determine what compiler to use.
this->compiler = "\""+compiler+"\""; this->compiler = "\""+compiler+"\"";
...@@ -127,9 +129,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -127,9 +129,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null"; string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null";
int res = std::system(testCompilerCommand.c_str()); int res = std::system(testCompilerCommand.c_str());
#endif #endif
isNvccAvailable = (res == 0); struct stat info;
isNvccAvailable = (res == 0 && stat(tempDir.c_str(), &info) == 0);
int cudaDriverVersion;
cuDriverGetVersion(&cudaDriverVersion);
static bool hasShownNvccWarning = false; static bool hasShownNvccWarning = false;
if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning) { if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning && cudaDriverVersion < 8000) {
hasShownNvccWarning = true; hasShownNvccWarning = true;
printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. "); printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. ");
#ifdef WIN32 #ifdef WIN32
...@@ -205,14 +210,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -205,14 +210,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int major, minor; int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device)); CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
#if __CUDA_API_VERSION < 7000 int numThreadBlocksPerComputeUnit = (major >= 6 ? 4 : 6);
if (cudaDriverVersion < 7000) {
// This is a workaround to support GTX 980 with CUDA 6.5. It reports // This is a workaround to support GTX 980 with CUDA 6.5. It reports
// its compute capability as 5.2, but the compiler doesn't support // its compute capability as 5.2, but the compiler doesn't support
// anything beyond 5.0. // anything beyond 5.0.
if (major == 5) if (major == 5)
minor = 0; minor = 0;
#endif }
#if __CUDA_API_VERSION < 8000 if (cudaDriverVersion < 8000) {
// This is a workaround to support Pascal with CUDA 7.5. It reports // This is a workaround to support Pascal with CUDA 7.5. It reports
// its compute capability as 6.x, but the compiler doesn't support // its compute capability as 6.x, but the compiler doesn't support
// anything beyond 5.3. // anything beyond 5.3.
...@@ -220,7 +226,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -220,7 +226,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
major = 5; major = 5;
minor = 3; minor = 3;
} }
#endif }
gpuArchitecture = intToString(major)+intToString(minor); gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor; computeCapability = major+0.1*minor;
...@@ -241,7 +247,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -241,7 +247,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize; numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
int multiprocessors; int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device)); CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int numThreadBlocksPerComputeUnit = 6;
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors; numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
if (useDoublePrecision) { if (useDoublePrecision) {
posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq"); posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
...@@ -287,6 +292,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -287,6 +292,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel = getKernel(utilities, "clearFourBuffers"); clearFourBuffersKernel = getKernel(utilities, "clearFourBuffers");
clearFiveBuffersKernel = getKernel(utilities, "clearFiveBuffers"); clearFiveBuffersKernel = getKernel(utilities, "clearFiveBuffers");
clearSixBuffersKernel = getKernel(utilities, "clearSixBuffers"); clearSixBuffersKernel = getKernel(utilities, "clearSixBuffers");
setChargesKernel = getKernel(utilities, "setCharges");
// Set defines based on the requested precision. // Set defines based on the requested precision.
...@@ -403,6 +409,8 @@ CudaContext::~CudaContext() { ...@@ -403,6 +409,8 @@ CudaContext::~CudaContext() {
delete energyParamDerivBuffer; delete energyParamDerivBuffer;
if (atomIndexDevice != NULL) if (atomIndexDevice != NULL)
delete atomIndexDevice; delete atomIndexDevice;
if (chargeBuffer != NULL)
delete chargeBuffer;
if (integration != NULL) if (integration != NULL)
delete integration; delete integration;
if (expression != NULL) if (expression != NULL)
...@@ -856,6 +864,25 @@ void CudaContext::clearAutoclearBuffers() { ...@@ -856,6 +864,25 @@ void CudaContext::clearAutoclearBuffers() {
} }
} }
void CudaContext::setCharges(const vector<double>& charges) {
if (chargeBuffer == NULL)
chargeBuffer = new CudaArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = charges[i];
chargeBuffer->upload(c);
}
else {
float* c = (float*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = (float) charges[i];
chargeBuffer->upload(c);
}
void* args[] = {&chargeBuffer->getDevicePointer(), &posq->getDevicePointer(), &atomIndexDevice->getDevicePointer(), &numAtoms};
executeKernel(setChargesKernel, args, numAtoms);
}
/** /**
* This class ensures that atom reordering doesn't break virtual sites. * This class ensures that atom reordering doesn't break virtual sites.
*/ */
...@@ -1054,9 +1081,19 @@ void CudaContext::findMoleculeGroups() { ...@@ -1054,9 +1081,19 @@ void CudaContext::findMoleculeGroups() {
} }
void CudaContext::invalidateMolecules() { void CudaContext::invalidateMolecules() {
for (int i = 0; i < forces.size(); i++)
if (invalidateMolecules(forces[i]))
return;
}
bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff()) if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return; return false;
bool valid = true; bool valid = true;
int forceIndex = -1;
for (int i = 0; i < forces.size(); i++)
if (forces[i] == force)
forceIndex = i;
for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) { for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) {
MoleculeGroup& mol = moleculeGroups[group]; MoleculeGroup& mol = moleculeGroups[group];
vector<int>& instances = mol.instances; vector<int>& instances = mol.instances;
...@@ -1071,22 +1108,21 @@ void CudaContext::invalidateMolecules() { ...@@ -1071,22 +1108,21 @@ void CudaContext::invalidateMolecules() {
Molecule& m2 = molecules[instances[j]]; Molecule& m2 = molecules[instances[j]];
int offset2 = offsets[j]; int offset2 = offsets[j];
for (int i = 0; i < (int) atoms.size() && valid; i++) { for (int i = 0; i < (int) atoms.size() && valid; i++) {
for (int k = 0; k < (int) forces.size(); k++) if (!force->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
if (!forces[k]->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2)) valid = false;
valid = false;
} }
// See if the force groups are identical. // See if the force groups are identical.
for (int i = 0; i < (int) forces.size() && valid; i++) { if (valid && forceIndex > -1) {
for (int k = 0; k < (int) m1.groups[i].size() && valid; k++) for (int k = 0; k < (int) m1.groups[forceIndex].size() && valid; k++)
if (!forces[i]->areGroupsIdentical(m1.groups[i][k], m2.groups[i][k])) if (!force->areGroupsIdentical(m1.groups[forceIndex][k], m2.groups[forceIndex][k]))
valid = false; valid = false;
} }
} }
} }
if (valid) if (valid)
return; return false;
// The list of which molecules are identical is no longer valid. We need to restore the // The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them // atoms to their original order, rebuild the list of identical molecules, and sort them
...@@ -1154,6 +1190,7 @@ void CudaContext::invalidateMolecules() { ...@@ -1154,6 +1190,7 @@ void CudaContext::invalidateMolecules() {
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
reorderAtoms(); reorderAtoms();
return true;
} }
void CudaContext::reorderAtoms() { void CudaContext::reorderAtoms() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment