Commit a381a3ab authored by peastman's avatar peastman
Browse files

Merge branch 'master' into gayberne

parents 5ecc8e00 1f7866ad
......@@ -314,8 +314,9 @@ private:
CustomNonbondedForce* forceCopy;
std::map<std::string, double> globalParamValues;
std::vector<std::set<int> > exclusions;
std::vector<std::string> parameterNames, globalParameterNames;
std::vector<std::string> parameterNames, globalParameterNames, energyParamDerivNames;
std::vector<std::pair<std::set<int>, std::set<int> > > interactionGroups;
std::vector<double> longRangeCoefficientDerivs;
NonbondedMethod nonbondedMethod;
CpuCustomNonbondedForce* nonbonded;
};
......@@ -398,7 +399,7 @@ private:
RealOpenMM nonbondedCutoff;
CpuCustomGBForce* ixn;
std::vector<std::set<int> > exclusions;
std::vector<std::string> particleParameterNames, globalParameterNames, valueNames;
std::vector<std::string> particleParameterNames, globalParameterNames, energyParamDerivNames, valueNames;
std::vector<OpenMM::CustomGBForce::ComputationType> valueTypes;
std::vector<OpenMM::CustomGBForce::ComputationType> energyTypes;
NonbondedMethod nonbondedMethod;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014 Stanford University and the Authors. *
* Portions copyright (c) 2014-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -196,7 +196,7 @@ void CpuBondForce::calculateForce(vector<RealVec>& atomCoordinates, RealOpenMM**
for (int i = 0; i < extraBonds.size(); i++) {
int bond = extraBonds[i];
referenceBondIxn.calculateBondIxn(bondAtoms[bond], atomCoordinates, parameters[bond], forces, totalEnergy);
referenceBondIxn.calculateBondIxn(bondAtoms[bond], atomCoordinates, parameters[bond], forces, totalEnergy, NULL);
}
// Compute the total energy.
......@@ -212,6 +212,6 @@ void CpuBondForce::threadComputeForce(ThreadPool& threads, int threadIndex, vect
int numBonds = bonds.size();
for (int i = 0; i < numBonds; i++) {
int bond = bonds[i];
referenceBondIxn.calculateBondIxn(bondAtoms[bond], atomCoordinates, parameters[bond], forces, totalEnergy);
referenceBondIxn.calculateBondIxn(bondAtoms[bond], atomCoordinates, parameters[bond], forces, totalEnergy, NULL);
}
}
\ No newline at end of file
......@@ -47,13 +47,16 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
const vector<Lepton::CompiledExpression>& valueExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueGradientExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueParamDerivExpressions,
const vector<string>& valueNames,
const vector<Lepton::CompiledExpression>& energyExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyDerivExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyGradientExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyParamDerivExpressions,
const vector<string>& parameterNames) :
valueExpressions(valueExpressions), valueDerivExpressions(valueDerivExpressions), valueGradientExpressions(valueGradientExpressions),
energyExpressions(energyExpressions), energyDerivExpressions(energyDerivExpressions), energyGradientExpressions(energyGradientExpressions) {
valueParamDerivExpressions(valueParamDerivExpressions), energyExpressions(energyExpressions), energyDerivExpressions(energyDerivExpressions),
energyGradientExpressions(energyGradientExpressions), energyParamDerivExpressions(energyParamDerivExpressions) {
firstAtom = (threadIndex*(long long) numAtoms)/numThreads;
lastAtom = ((threadIndex+1)*(long long) numAtoms)/numThreads;
for (int i = 0; i < (int) valueExpressions.size(); i++)
......@@ -64,6 +67,9 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
for (int i = 0; i < (int) valueGradientExpressions.size(); i++)
for (int j = 0; j < (int) valueGradientExpressions[i].size(); j++)
expressionSet.registerExpression(this->valueGradientExpressions[i][j]);
for (int i = 0; i < (int) valueParamDerivExpressions.size(); i++)
for (int j = 0; j < (int) valueParamDerivExpressions[i].size(); j++)
expressionSet.registerExpression(this->valueParamDerivExpressions[i][j]);
for (int i = 0; i < (int) energyExpressions.size(); i++)
expressionSet.registerExpression(this->energyExpressions[i]);
for (int i = 0; i < (int) energyDerivExpressions.size(); i++)
......@@ -72,6 +78,9 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
for (int i = 0; i < (int) energyGradientExpressions.size(); i++)
for (int j = 0; j < (int) energyGradientExpressions[i].size(); j++)
expressionSet.registerExpression(this->energyGradientExpressions[i][j]);
for (int i = 0; i < (int) energyParamDerivExpressions.size(); i++)
for (int j = 0; j < (int) energyParamDerivExpressions[i].size(); j++)
expressionSet.registerExpression(this->energyParamDerivExpressions[i][j]);
xindex = expressionSet.getVariableIndex("x");
yindex = expressionSet.getVariableIndex("y");
zindex = expressionSet.getVariableIndex("z");
......@@ -101,30 +110,37 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
dVdZ.resize(valueDerivExpressions.size());
dVdR1.resize(valueDerivExpressions.size());
dVdR2.resize(valueDerivExpressions.size());
dValue0dParam.resize(valueParamDerivExpressions[0].size(), vector<float>(numAtoms));
energyParamDerivs.resize(valueParamDerivExpressions[0].size());
}
CpuCustomGBForce::CpuCustomGBForce(int numAtoms, const std::vector<std::set<int> >& exclusions,
const vector<Lepton::CompiledExpression>& valueExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueGradientExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueParamDerivExpressions,
const vector<string>& valueNames,
const vector<CustomGBForce::ComputationType>& valueTypes,
const vector<Lepton::CompiledExpression>& energyExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyDerivExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyGradientExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyParamDerivExpressions,
const vector<CustomGBForce::ComputationType>& energyTypes,
const vector<string>& parameterNames, ThreadPool& threads) :
exclusions(exclusions), cutoff(false), periodic(false), valueNames(valueNames), valueTypes(valueTypes),
energyTypes(energyTypes), paramNames(parameterNames), threads(threads) {
exclusions(exclusions), cutoff(false), periodic(false), valueTypes(valueTypes), energyTypes(energyTypes), numValues(valueNames.size()),
numParams(parameterNames.size()), threads(threads) {
for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(numAtoms, threads.getNumThreads(), i, valueExpressions, valueDerivExpressions, valueGradientExpressions, valueNames,
energyExpressions, energyDerivExpressions, energyGradientExpressions, parameterNames));
values.resize(valueNames.size());
dEdV.resize(valueNames.size());
threadData.push_back(new ThreadData(numAtoms, threads.getNumThreads(), i, valueExpressions, valueDerivExpressions, valueGradientExpressions,
valueParamDerivExpressions, valueNames, energyExpressions, energyDerivExpressions, energyGradientExpressions, energyParamDerivExpressions, parameterNames));
values.resize(numValues);
dEdV.resize(numValues);
for (int i = 0; i < (int) values.size(); i++) {
values[i].resize(numAtoms);
dEdV[i].resize(numAtoms);
}
dValuedParam.resize(numValues);
for (int i = 0; i < numValues; i++)
dValuedParam[i].resize(valueParamDerivExpressions[0].size(), vector<float>(numAtoms));
}
CpuCustomGBForce::~CpuCustomGBForce() {
......@@ -153,7 +169,7 @@ void CpuCustomGBForce::setPeriodic(RealVec& boxSize) {
void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM** atomParameters,
map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
bool includeForce, bool includeEnergy, double& totalEnergy) {
bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
......@@ -173,12 +189,20 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM**
gmx_atomic_set(&counter, 0);
threads.execute(task);
threads.waitForThreads();
// Sum derivatives of the first computed value with respect to global parameters.
bool hasParamDerivs = (threadData[0]->dValue0dParam.size() > 0);
if (hasParamDerivs) {
threads.resumeThreads();
threads.waitForThreads();
}
// Calculate the remaining computed values.
threads.resumeThreads();
threads.waitForThreads();
// Calculate the energy terms.
for (int i = 0; i < (int) threadData[0]->energyExpressions.size(); i++) {
......@@ -205,6 +229,10 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM**
for (int i = 0; i < numThreads; i++)
totalEnergy += threadEnergy[i];
}
if (hasParamDerivs)
for (int i = 0; i < threads.getNumThreads(); i++)
for (int j = 0; j < threadData[i]->energyParamDerivs.size(); j++)
energyParamDerivs[j] += threadData[i]->energyParamDerivs[j];
}
void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
......@@ -224,11 +252,28 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
for (int i = 0; i < (int) data.value0.size(); i++)
data.value0[i] = 0.0f;
for (int i = 0; i < (int) data.dValue0dParam.size(); i++)
for (int j = 0; j < (int) data.dValue0dParam[i].size(); j++)
data.dValue0dParam[i][j] = 0.0;
if (valueTypes[0] == CustomGBForce::ParticlePair)
calculateParticlePairValue(0, data, numberOfAtoms, posq, atomParameters, true, boxSize, invBoxSize);
else
calculateParticlePairValue(0, data, numberOfAtoms, posq, atomParameters, false, boxSize, invBoxSize);
threads.syncThreads();
// Sum derivatives of the first computed value with respect to global parameters.
bool hasParamDerivs = (data.dValue0dParam.size() > 0);
if (hasParamDerivs) {
for (int j = 0; j < data.dValue0dParam.size(); j++)
for (int k = data.firstAtom; k < data.lastAtom; k++) {
float sum = 0.0f;
for (int m = 0; m < threadData.size(); m++)
sum += threadData[m]->dValue0dParam[j][k];
dValuedParam[0][j][k] = sum;
}
threads.syncThreads();
}
// Sum the first computed value and calculate the remaining ones.
......@@ -241,11 +286,23 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
data.expressionSet.setVariable(data.xindex, posq[4*atom]);
data.expressionSet.setVariable(data.yindex, posq[4*atom+1]);
data.expressionSet.setVariable(data.zindex, posq[4*atom+2]);
for (int j = 0; j < (int) paramNames.size(); j++)
for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[atom][j]);
for (int i = 1; i < numValues; i++) {
data.expressionSet.setVariable(data.valueIndex[i-1], values[i-1][atom]);
values[i][atom] = (float) data.valueExpressions[i].evaluate();
// Calculate derivatives with respect to parameters.
if (hasParamDerivs) {
for (int j = 0; j < data.valueParamDerivExpressions[i].size(); j++)
dValuedParam[i][j][atom] = data.valueParamDerivExpressions[i][j].evaluate();
for (int j = 0; j < i; j++) {
float dVdV = data.valueDerivExpressions[i][j].evaluate();
for (int k = 0; k < data.valueParamDerivExpressions[i].size(); k++)
dValuedParam[i][k][atom] += dVdV*dValuedParam[j][k][atom];
}
}
}
}
threads.syncThreads();
......@@ -254,7 +311,9 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
for (int i = 0; i < (int) data.dEdV.size(); i++)
for (int j = 0; j < (int) data.dEdV[i].size(); j++)
data.dEdV[i][j] = 0.0;
data.dEdV[i][j] = 0.0f;
for (int i = 0; i < (int) data.energyParamDerivs.size(); i++)
data.energyParamDerivs[i] = 0.0f;
for (int termIndex = 0; termIndex < (int) data.energyExpressions.size(); termIndex++) {
if (energyTypes[termIndex] == CustomGBForce::SingleParticle)
calculateSingleParticleEnergyTerm(termIndex, data, numberOfAtoms, posq, atomParameters, forces, energy);
......@@ -339,7 +398,7 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
if (cutoff && r2 >= cutoffDistance2)
return;
float r = sqrtf(r2);
for (int i = 0; i < (int) paramNames.size(); i++) {
for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
}
......@@ -349,6 +408,11 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]);
}
valueArray[atom1] += (float) data.valueExpressions[index].evaluate();
// Calculate derivatives with respect to parameters.
for (int i = 0; i < data.valueParamDerivExpressions[index].size(); i++)
data.dValue0dParam[i][atom1] += data.valueParamDerivExpressions[index][i].evaluate();
}
void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq,
......@@ -357,17 +421,22 @@ void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData&
data.expressionSet.setVariable(data.xindex, posq[4*i]);
data.expressionSet.setVariable(data.yindex, posq[4*i+1]);
data.expressionSet.setVariable(data.zindex, posq[4*i+2]);
for (int j = 0; j < (int) paramNames.size(); j++)
for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]);
for (int j = 0; j < (int) valueNames.size(); j++)
for (int j = 0; j < (int) values.size(); j++)
data.expressionSet.setVariable(data.valueIndex[j], values[j][i]);
if (includeEnergy)
totalEnergy += (float) data.energyExpressions[index].evaluate();
for (int j = 0; j < (int) valueNames.size(); j++)
for (int j = 0; j < (int) values.size(); j++)
data.dEdV[j][i] += (float) data.energyDerivExpressions[index][j].evaluate();
forces[4*i+0] -= (float) data.energyGradientExpressions[index][0].evaluate();
forces[4*i+1] -= (float) data.energyGradientExpressions[index][1].evaluate();
forces[4*i+2] -= (float) data.energyGradientExpressions[index][2].evaluate();
// Compute derivatives with respect to parameters.
for (int k = 0; k < data.energyParamDerivExpressions[index].size(); k++)
data.energyParamDerivs[k] += data.energyParamDerivExpressions[index][k].evaluate();
}
}
......@@ -428,12 +497,12 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom
// Record variables for evaluating expressions.
for (int i = 0; i < (int) paramNames.size(); i++) {
for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
}
data.expressionSet.setVariable(data.rindex, r);
for (int i = 0; i < (int) valueNames.size(); i++) {
for (int i = 0; i < (int) values.size(); i++) {
data.expressionSet.setVariable(data.particleValueIndex[i*2], values[i][atom1]);
data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]);
}
......@@ -447,10 +516,15 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*atom1)-result).store(forces+4*atom1);
(fvec4(forces+4*atom2)+result).store(forces+4*atom2);
for (int i = 0; i < (int) valueNames.size(); i++) {
for (int i = 0; i < (int) values.size(); i++) {
data.dEdV[i][atom1] += (float) data.energyDerivExpressions[index][2*i+1].evaluate();
data.dEdV[i][atom2] += (float) data.energyDerivExpressions[index][2*i+2].evaluate();
}
// Compute derivatives with respect to parameters.
for (int i = 0; i < data.energyParamDerivExpressions[index].size(); i++)
data.energyParamDerivs[i] += data.energyParamDerivExpressions[index][i].evaluate();
}
void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, float* posq, RealOpenMM** atomParameters,
......@@ -500,9 +574,9 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
data.expressionSet.setVariable(data.xindex, posq[4*i]);
data.expressionSet.setVariable(data.yindex, posq[4*i+1]);
data.expressionSet.setVariable(data.zindex, posq[4*i+2]);
for (int j = 0; j < (int) paramNames.size(); j++)
for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]);
for (int j = 1; j < (int) valueNames.size(); j++) {
for (int j = 1; j < (int) values.size(); j++) {
data.expressionSet.setVariable(data.valueIndex[j-1], values[j-1][i]);
data.dVdX[j] = 0.0;
data.dVdY[j] = 0.0;
......@@ -521,6 +595,13 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
forces[4*i+2] -= dEdV[j][i]*data.dVdZ[j];
}
}
// Compute chain rule terms for derivatives with respect to parameters.
for (int i = data.firstAtom; i < data.lastAtom; i++)
for (int j = 0; j < data.valueIndex.size(); j++)
for (int k = 0; k < dValuedParam[j].size(); k++)
data.energyParamDerivs[k] += dEdV[j][i]*dValuedParam[j][k][i];
}
void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadData& data, float* posq, RealOpenMM** atomParameters,
......@@ -538,7 +619,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat
// Record variables for evaluating expressions.
for (int i = 0; i < (int) paramNames.size(); i++) {
for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
data.expressionSet.setVariable(data.paramIndex[i], atomParameters[atom1][i]);
......@@ -562,7 +643,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat
f1 -= deltaR*(dEdV[0][atom1]*data.dVdR1[0]);
f2 -= deltaR*(dEdV[0][atom1]*data.dVdR2[0]);
}
for (int i = 1; i < (int) valueNames.size(); i++) {
for (int i = 1; i < (int) values.size(); i++) {
data.expressionSet.setVariable(data.valueIndex[i], values[i][atom1]);
data.dVdR1[i] = 0.0;
data.dVdR2[i] = 0.0;
......
......@@ -43,25 +43,30 @@ public:
CpuCustomNonbondedForce& owner;
};
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames) :
energyExpression(energyExpression), forceExpression(forceExpression) {
energyR = ReferenceForce::getVariablePointer(this->energyExpression, "r");
forceR = ReferenceForce::getVariablePointer(this->forceExpression, "r");
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
expressionSet.registerExpression(this->energyExpression);
expressionSet.registerExpression(this->forceExpression);
for (int i = 0; i < this->energyParamDerivExpressions.size(); i++)
expressionSet.registerExpression(this->energyParamDerivExpressions[i]);
rIndex = expressionSet.getVariableIndex("r");
for (int i = 0; i < (int) parameterNames.size(); i++) {
for (int j = 1; j < 3; j++) {
stringstream name;
name << parameterNames[i] << j;
energyParticleParams.push_back(ReferenceForce::getVariablePointer(this->energyExpression, name.str()));
forceParticleParams.push_back(ReferenceForce::getVariablePointer(this->forceExpression, name.str()));
particleParamIndex.push_back(expressionSet.getVariableIndex(name.str()));
}
}
energyParamDerivs.resize(energyParamDerivExpressions.size());
}
CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression,
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,ThreadPool& threads) :
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) :
cutoff(false), useSwitch(false), periodic(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames));
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions));
}
CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
......@@ -120,7 +125,7 @@ void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<RealVec>& atomCoordinates, RealOpenMM** atomParameters,
RealOpenMM* fixedParameters, const map<string, double>& globalParameters,
vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy) {
vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
......@@ -144,11 +149,18 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
// Combine the energies from all the threads.
int numThreads = threads.getNumThreads();
if (includeEnergy) {
int numThreads = threads.getNumThreads();
for (int i = 0; i < numThreads; i++)
totalEnergy += threadEnergy[i];
}
// Combine the energy derivatives from all threads.
int numDerivs = threadData[0]->energyParamDerivs.size();
for (int i = 0; i < numThreads; i++)
for (int j = 0; j < numDerivs; j++)
energyParamDerivs[j] += threadData[i]->energyParamDerivs[j];
}
void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
......@@ -159,10 +171,10 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
double& energy = threadEnergy[threadIndex];
float* forces = &(*threadForce)[threadIndex][0];
ThreadData& data = *threadData[threadIndex];
for (map<string, double>::const_iterator iter = globalParameters->begin(); iter != globalParameters->end(); ++iter) {
ReferenceForce::setVariable(ReferenceForce::getVariablePointer(data.energyExpression, iter->first), iter->second);
ReferenceForce::setVariable(ReferenceForce::getVariablePointer(data.forceExpression, iter->first), iter->second);
}
for (map<string, double>::const_iterator iter = globalParameters->begin(); iter != globalParameters->end(); ++iter)
data.expressionSet.setVariable(data.expressionSet.getVariableIndex(iter->first), iter->second);
for (int i = 0; i < data.energyParamDerivs.size(); i++)
data.energyParamDerivs[i] = 0.0;
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (groupInteractions.size() > 0) {
......@@ -175,10 +187,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
int atom1 = groupInteractions[i].first;
int atom2 = groupInteractions[i].second;
for (int j = 0; j < (int) paramNames.size(); j++) {
ReferenceForce::setVariable(data.energyParticleParams[j*2], atomParameters[atom1][j]);
ReferenceForce::setVariable(data.energyParticleParams[j*2+1], atomParameters[atom2][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2], atomParameters[atom1][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2+1], atomParameters[atom2][j]);
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[atom1][j]);
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[atom2][j]);
}
calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize);
}
......@@ -196,17 +206,13 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
for (int j = 0; j < (int) paramNames.size(); j++) {
ReferenceForce::setVariable(data.energyParticleParams[j*2], atomParameters[first][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2], atomParameters[first][j]);
}
for (int j = 0; j < (int) paramNames.size(); j++)
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[first][j]);
for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) {
int second = blockAtom[k];
for (int j = 0; j < (int) paramNames.size(); j++) {
ReferenceForce::setVariable(data.energyParticleParams[j*2+1], atomParameters[second][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2+1], atomParameters[second][j]);
}
for (int j = 0; j < (int) paramNames.size(); j++)
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[second][j]);
calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize);
}
}
......@@ -223,10 +229,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
for (int jj = ii+1; jj < numberOfAtoms; jj++) {
if (exclusions[jj].find(ii) == exclusions[jj].end()) {
for (int j = 0; j < (int) paramNames.size(); j++) {
ReferenceForce::setVariable(data.energyParticleParams[j*2], atomParameters[ii][j]);
ReferenceForce::setVariable(data.energyParticleParams[j*2+1], atomParameters[jj][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2], atomParameters[ii][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2+1], atomParameters[jj][j]);
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[ii][j]);
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[jj][j]);
}
calculateOneIxn(ii, jj, data, forces, energy, boxSize, invBoxSize);
}
......@@ -250,15 +254,15 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
// accumulate forces
ReferenceForce::setVariable(data.energyR, r);
ReferenceForce::setVariable(data.forceR, r);
data.expressionSet.setVariable(data.rIndex, r);
double dEdR = (includeForce ? data.forceExpression.evaluate()/r : 0.0);
double energy = (includeEnergy ? data.energyExpression.evaluate() : 0.0);
double switchValue = 1.0;
if (useSwitch) {
if (r > switchingDistance) {
RealOpenMM t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
RealOpenMM switchValue = 1+t*t*t*(-10+t*(15-t*6));
RealOpenMM switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
double t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
switchValue = 1+t*t*t*(-10+t*(15-t*6));
double switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
dEdR = switchValue*dEdR + energy*switchDeriv/r;
energy *= switchValue;
}
......@@ -270,6 +274,11 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
// accumulate energies
totalEnergy += energy;
// Accumulate energy derivatives.
for (int i = 0; i < data.energyParamDerivExpressions.size(); i++)
data.energyParamDerivs[i] += switchValue*data.energyParamDerivExpressions[i].evaluate();
}
void CpuCustomNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, const fvec4& boxSize, const fvec4& invBoxSize) const {
......
......@@ -85,6 +85,11 @@ static ReferenceConstraints& extractConstraints(ContextImpl& context) {
return *(ReferenceConstraints*) data->constraints;
}
static map<string, double>& extractEnergyParameterDerivatives(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *((map<string, double>*) data->energyParameterDerivatives);
}
/**
* Make sure an expression doesn't use any undefined variables.
*/
......@@ -814,6 +819,12 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
globalParameterNames.push_back(force.getGlobalParameterName(i));
globalParamValues[force.getGlobalParameterName(i)] = force.getGlobalParameterDefaultValue(i);
}
std::vector<Lepton::CompiledExpression> energyParamDerivExpressions;
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string param = force.getEnergyParameterDerivativeName(i);
energyParamDerivNames.push_back(param);
energyParamDerivExpressions.push_back(expression.differentiate(param).createCompiledExpression());
}
set<string> variables;
variables.insert("r");
for (int i = 0; i < numParameters; i++) {
......@@ -847,7 +858,7 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
interactionGroups.push_back(make_pair(set1, set2));
}
data.isPeriodic = (nonbondedMethod == CutoffPeriodic);
nonbonded = new CpuCustomNonbondedForce(energyExpression, forceExpression, parameterNames, exclusions, data.threads);
nonbonded = new CpuCustomNonbondedForce(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions, data.threads);
if (interactionGroups.size() > 0)
nonbonded->setInteractionGroups(interactionGroups);
}
......@@ -875,15 +886,22 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc
}
if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance);
nonbonded->calculatePairIxn(numParticles, &data.posq[0], posData, particleParamArray, 0, globalParamValues, data.threadForce, includeForces, includeEnergy, energy);
vector<double> energyParamDerivValues(energyParamDerivNames.size()+1, 0.0);
nonbonded->calculatePairIxn(numParticles, &data.posq[0], posData, particleParamArray, 0, globalParamValues, data.threadForce, includeForces, includeEnergy, energy, &energyParamDerivValues[0]);
map<string, double>& energyParamDerivs = extractEnergyParameterDerivatives(context);
for (int i = 0; i < energyParamDerivNames.size(); i++)
energyParamDerivs[energyParamDerivNames[i]] += energyParamDerivValues[i];
// Add in the long range correction.
if (!hasInitializedLongRangeCorrection || (globalParamsChanged && forceCopy != NULL)) {
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner());
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
}
energy += longRangeCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
double volume = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
energy += longRangeCoefficient/volume;
for (int i = 0; i < longRangeCoefficientDerivs.size(); i++)
energyParamDerivs[energyParamDerivNames[i]] += longRangeCoefficientDerivs[i]/volume;
return energy;
}
......@@ -905,7 +923,7 @@ void CpuCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& con
// If necessary, recompute the long range correction.
if (forceCopy != NULL) {
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(force, context.getOwner());
CustomNonbondedForceImpl::calcLongRangeCorrection(force, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
*forceCopy = force;
}
......@@ -1027,6 +1045,7 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
vector<vector<Lepton::CompiledExpression> > valueDerivExpressions(force.getNumComputedValues());
vector<vector<Lepton::CompiledExpression> > valueGradientExpressions(force.getNumComputedValues());
vector<vector<Lepton::CompiledExpression> > valueParamDerivExpressions(force.getNumComputedValues());
vector<Lepton::CompiledExpression> valueExpressions;
vector<Lepton::CompiledExpression> energyExpressions;
set<string> particleVariables, pairVariables;
......@@ -1061,6 +1080,11 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
valueDerivExpressions[i].push_back(ex.differentiate(valueNames[j]).createCompiledExpression());
validateVariables(ex.getRootNode(), particleVariables);
}
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++) {
string param = force.getEnergyParameterDerivativeName(j);
energyParamDerivNames.push_back(param);
valueParamDerivExpressions[i].push_back(ex.differentiate(param).createCompiledExpression());
}
particleVariables.insert(name);
pairVariables.insert(name+"1");
pairVariables.insert(name+"2");
......@@ -1070,6 +1094,7 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
vector<vector<Lepton::CompiledExpression> > energyDerivExpressions(force.getNumEnergyTerms());
vector<vector<Lepton::CompiledExpression> > energyGradientExpressions(force.getNumEnergyTerms());
vector<vector<Lepton::CompiledExpression> > energyParamDerivExpressions(force.getNumEnergyTerms());
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
......@@ -1093,14 +1118,17 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
validateVariables(ex.getRootNode(), pairVariables);
}
}
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).createCompiledExpression());
}
// Delete the custom functions.
for (map<string, Lepton::CustomFunction*>::iterator iter = functions.begin(); iter != functions.end(); iter++)
delete iter->second;
ixn = new CpuCustomGBForce(numParticles, exclusions, valueExpressions, valueDerivExpressions, valueGradientExpressions, valueNames, valueTypes, energyExpressions,
energyDerivExpressions, energyGradientExpressions, energyTypes, particleParameterNames, data.threads);
ixn = new CpuCustomGBForce(numParticles, exclusions, valueExpressions, valueDerivExpressions, valueGradientExpressions, valueParamDerivExpressions,
valueNames, valueTypes, energyExpressions, energyDerivExpressions, energyGradientExpressions, energyParamDerivExpressions, energyTypes,
particleParameterNames, data.threads);
data.isPeriodic = (force.getNonbondedMethod() == CustomGBForce::CutoffPeriodic);
}
......@@ -1117,7 +1145,11 @@ double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFor
map<string, double> globalParameters;
for (int i = 0; i < (int) globalParameterNames.size(); i++)
globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]);
ixn->calculateIxn(numParticles, &data.posq[0], particleParamArray, globalParameters, data.threadForce, includeForces, includeEnergy, energy);
vector<double> energyParamDerivValues(energyParamDerivNames.size()+1, 0.0);
ixn->calculateIxn(numParticles, &data.posq[0], particleParamArray, globalParameters, data.threadForce, includeForces, includeEnergy, energy, &energyParamDerivValues[0]);
map<string, double>& energyParamDerivs = extractEnergyParameterDerivatives(context);
for (int i = 0; i < energyParamDerivNames.size(); i++)
energyParamDerivs[energyParamDerivNames[i]] += energyParamDerivValues[i];
return energy;
}
......
......@@ -386,13 +386,15 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float inverseR = 1/r;
float chargeProdOverR = scaledChargeI*posq[4*j+3]*inverseR;
float dEdR = chargeProdOverR*inverseR*inverseR;
dEdR = dEdR * (erfAlphaR-(float)TWO_OVER_SQRT_PI*alphaR*(float)exp(-alphaR*alphaR));
dEdR = dEdR * (erfAlphaR-TWO_OVER_SQRT_PI*alphaR*(float)exp(-alphaR*alphaR));
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j);
if (includeEnergy)
threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
}
else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
}
}
}
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -100,6 +100,15 @@ public:
* refer to it by this name.
*/
std::string addArgument(CUdeviceptr data, const std::string& type);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/**
* Add some Cuda code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel.
......@@ -129,6 +138,7 @@ private:
std::vector<std::string> argTypes;
std::vector<std::vector<CudaArray*> > atomIndices;
std::vector<std::string> prefixCode;
std::vector<std::string> energyParameterDerivatives;
std::vector<void*> kernelArgs;
int numForceBuffers, maxBonds, allGroups;
bool hasInitializedKernels, hasInteractions;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -177,6 +177,12 @@ public:
CudaArray& getEnergyBuffer() {
return *energyBuffer;
}
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
CudaArray& getEnergyParamDerivBuffer() {
return *energyParamDerivBuffer;
}
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
......@@ -326,6 +332,18 @@ public:
void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool getForcesValid() const {
return forcesValid;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void setForcesValid(bool valid) {
forcesValid = valid;
}
/**
* Get the number of atoms.
*/
......@@ -532,6 +550,27 @@ public:
std::vector<ForcePostComputation*>& getPostComputations() {
return postComputations;
}
/**
* Get the names of all parameters with respect to which energy derivatives are computed.
*/
const std::vector<std::string>& getEnergyParamDerivNames() const {
return energyParamDerivNames;
}
/**
* Get a workspace data structure used for accumulating the values of derivatives of the energy
* with respect to parameters.
*/
std::map<std::string, double>& getEnergyParamDerivWorkspace() {
return energyParamDerivWorkspace;
}
/**
* Register that the derivative of potential energy with respect to a context parameter
* will need to be calculated. If this is called multiple times for a single parameter,
* it is only added to the list once.
*
* @param param the name of the parameter to add
*/
void addEnergyParameterDerivative(const std::string& param);
/**
* Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions
......@@ -572,7 +611,7 @@ private:
int paddedNumAtoms;
int numAtomBlocks;
int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, forcesValid;
std::string compiler, tempDir, cacheDir, gpuArchitecture;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
......@@ -597,7 +636,10 @@ private:
CudaArray* velm;
CudaArray* force;
CudaArray* energyBuffer;
CudaArray* energyParamDerivBuffer;
CudaArray* atomIndexDevice;
std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex;
std::vector<CUdeviceptr> autoclearBuffers;
std::vector<int> autoclearBufferSizes;
......
......@@ -163,6 +163,12 @@ public:
* @param forces on exit, this contains the forces
*/
void getForces(ContextImpl& context, std::vector<Vec3>& forces);
/**
* Get the current derivatives of the energy with respect to context parameters.
*
* @param derivs on exit, this contains the derivatives
*/
void getEnergyParameterDerivatives(ContextImpl& context, std::map<std::string, double>& derivs);
/**
* Get the current periodic box vectors.
*
......@@ -729,6 +735,7 @@ private:
std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions;
double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel;
int numGroupThreadBlocks;
CustomNonbondedForce* forceCopy;
......@@ -819,13 +826,15 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
double cutoff;
bool hasInitializedKernels, needParameterGradient;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues;
CudaContext& cu;
CudaParameterSet* params;
CudaParameterSet* computedValues;
CudaParameterSet* energyDerivs;
CudaParameterSet* energyDerivChain;
std::vector<CudaParameterSet*> dValuedParam;
std::vector<CudaArray*> dValue0dParam;
CudaArray* longEnergyDerivs;
CudaArray* globals;
CudaArray* valueBuffers;
......@@ -970,6 +979,7 @@ public:
private:
int numGroups, numBonds;
bool needEnergyParamDerivs;
CudaContext& cu;
CudaParameterSet* params;
CudaArray* globals;
......@@ -1284,7 +1294,7 @@ public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
randomSeed(NULL), perDofValues(NULL) {
randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
}
~CudaIntegrateCustomStepKernel();
/**
......@@ -1349,8 +1359,11 @@ public:
private:
class ReorderListener;
class GlobalTarget;
class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target);
void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step);
......@@ -1358,18 +1371,23 @@ private:
double energy;
float energyFloat;
int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent;
CudaArray* globalValues;
CudaArray* sumBuffer;
CudaArray* summedValue;
CudaArray* uniformRandoms;
CudaArray* randomSeed;
CudaArray* perDofEnergyParamDerivs;
std::map<int, CudaArray*> savedForces;
std::set<int> validSavedForces;
CudaParameterSet* perDofValues;
mutable std::vector<std::vector<float> > localPerDofValuesFloat;
mutable std::vector<std::vector<double> > localPerDofValuesDouble;
std::map<std::string, double> energyParamDerivs;
std::vector<std::string> perDofEnergyParamDerivNames;
std::vector<float> localPerDofEnergyParamDerivsFloat;
std::vector<double> localPerDofEnergyParamDerivsDouble;
std::vector<float> globalValuesFloat;
std::vector<double> globalValuesDouble;
std::vector<double> initialGlobalVariables;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -88,6 +88,15 @@ public:
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/
void addArgument(const ParameterInfo& parameter);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/**
* Specify the list of exclusions that an interaction outside the default kernel will depend on.
*
......@@ -269,10 +278,13 @@ private:
CudaArray* oldPositions;
CudaArray* rebuildNeighborList;
CudaSort* blockSorter;
CUevent downloadCountEvent;
int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments;
std::vector<std::string> energyParameterDerivatives;
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -52,12 +52,25 @@ void CudaBondedUtilities::addInteraction(const vector<vector<int> >& atoms, cons
}
}
std::string CudaBondedUtilities::addArgument(CUdeviceptr data, const string& type) {
string CudaBondedUtilities::addArgument(CUdeviceptr data, const string& type) {
arguments.push_back(data);
argTypes.push_back(type);
return "customArg"+context.intToString(arguments.size());
}
string CudaBondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void CudaBondedUtilities::addPrefixCode(const string& source) {
for (int i = 0; i < (int) prefixCode.size(); i++)
if (prefixCode[i] == source)
......@@ -109,11 +122,21 @@ void CudaBondedUtilities::initialize(const System& system) {
}
for (int i = 0; i < (int) arguments.size(); i++)
s<<", "<<argTypes[i]<<"* customArg"<<(i+1);
if (energyParameterDerivatives.size() > 0)
s<<", mixed* __restrict__ energyParamDerivs";
s<<") {\n";
s<<"mixed energy = 0;\n";
for (int i = 0; i < energyParameterDerivatives.size(); i++)
s<<"mixed energyParamDeriv"<<i<<" = 0;\n";
for (int force = 0; force < numForces; force++)
s<<createForceSource(force, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
s<<"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;\n";
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
s<<"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
s<<"}\n";
map<string, string> defines;
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
......@@ -171,6 +194,8 @@ void CudaBondedUtilities::computeInteractions(int groups) {
kernelArgs.push_back(&atomIndices[i][j]->getDevicePointer());
for (int i = 0; i < (int) arguments.size(); i++)
kernelArgs.push_back(&arguments[i]);
if (energyParameterDerivatives.size() > 0)
kernelArgs.push_back(&context.getEnergyParamDerivBuffer().getDevicePointer());
}
if (!hasInteractions)
return;
......
......@@ -76,7 +76,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
this->compiler = "\""+compiler+"\"";
if (platformData.context != NULL) {
try {
......@@ -339,6 +339,8 @@ CudaContext::~CudaContext() {
delete force;
if (energyBuffer != NULL)
delete energyBuffer;
if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (integration != NULL)
......@@ -390,6 +392,14 @@ void CudaContext::initialize() {
force = CudaArray::create<long long>(*this, paddedNumAtoms*3, "force");
addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize());
addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize());
int numEnergyParamDerivs = energyParamDerivNames.size();
if (numEnergyParamDerivs > 0) {
if (useDoublePrecision || useMixedPrecision)
energyParamDerivBuffer = CudaArray::create<double>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
else
energyParamDerivBuffer = CudaArray::create<float>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
addAutoclearBuffer(*energyParamDerivBuffer);
}
atomIndexDevice = CudaArray::create<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
......@@ -1131,7 +1141,6 @@ void CudaContext::reorderAtoms() {
reorderAtomsImpl<float, float4, double, double4>();
else
reorderAtomsImpl<float, float4, float, float4>();
nonbonded->updateNeighborListSize();
}
template <class Real, class Real4, class Mixed, class Mixed4>
......@@ -1312,6 +1321,15 @@ void CudaContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation);
}
void CudaContext::addEnergyParameterDerivative(const string& param) {
// See if this parameter has already been registered.
for (int i = 0; i < energyParamDerivNames.size(); i++)
if (param == energyParamDerivNames[i])
return;
energyParamDerivNames.push_back(param);
}
struct CudaContext::WorkThread::ThreadData {
ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
......
......@@ -174,8 +174,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
out << "float4 c[4];\n";
for (int j = 0; j < 4; j++)
......@@ -217,9 +217,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n";
out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n";
out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << ");\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << "-1);\n";
out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n";
out << "float4 c[16];\n";
for (int j = 0; j < 16; j++)
......
This diff is collapsed.
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -65,12 +65,14 @@ private:
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), pinnedCountBuffer(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
// Decide how many thread blocks to use.
string errorMessage = "Error initializing nonbonded utilities";
int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice()));
CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0));
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, sizeof(int), CU_MEMHOSTALLOC_PORTABLE));
numForceThreadBlocks = 4*multiprocessors;
forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
}
......@@ -106,6 +108,9 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete rebuildNeighborList;
if (blockSorter != NULL)
delete blockSorter;
if (pinnedCountBuffer != NULL)
cuMemFreeHost(pinnedCountBuffer);
cuEventDestroy(downloadCountEvent);
}
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
......@@ -141,6 +146,19 @@ void CudaNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments.push_back(parameter);
}
string CudaNonbondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void CudaNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) {
if (anyExclusions) {
bool sameExclusions = (exclusionList.size() == atomExclusions.size());
......@@ -303,6 +321,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
forceArgs.push_back(&parameters[i].getMemory());
for (int i = 0; i < (int) arguments.size(); i++)
forceArgs.push_back(&arguments[i].getMemory());
if (energyParameterDerivatives.size() > 0)
forceArgs.push_back(&context.getEnergyParamDerivBuffer().getDevicePointer());
if (useCutoff) {
findBlockBoundsArgs.push_back(&numAtoms);
findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer());
......@@ -375,17 +395,14 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if (lastCutoff != kernels.cutoffDistance)
forceRebuildNeighborList = true;
bool rebuild = false;
do {
context.executeKernel(kernels.findBlockBoundsKernel, &findBlockBoundsArgs[0], context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
context.executeKernel(kernels.sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms());
context.executeKernel(kernels.findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256);
forceRebuildNeighborList = false;
if (context.getComputeForceCount() == 1)
rebuild = updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
} while(rebuild);
context.executeKernel(kernels.findBlockBoundsKernel, &findBlockBoundsArgs[0], context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
context.executeKernel(kernels.sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms());
context.executeKernel(kernels.findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256);
forceRebuildNeighborList = false;
lastCutoff = kernels.cutoffDistance;
interactionCount->download(pinnedCountBuffer, false);
cuEventRecord(downloadCountEvent, context.getCurrentStream());
}
void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
......@@ -398,20 +415,22 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
kernel = createInteractionKernel(kernels.source, parameters, arguments, true, true, forceGroups, includeForces, includeEnergy);
context.executeKernel(kernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
if (useCutoff && numTiles > 0) {
cuEventSynchronize(downloadCountEvent);
updateNeighborListSize();
}
}
bool CudaNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return false;
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer();
interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) maxTiles)
if (pinnedCountBuffer[0] <= (unsigned int) maxTiles)
return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
maxTiles = (int) (1.2*pinnedInteractionCount[0]);
maxTiles = (int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (maxTiles > totalTiles)
maxTiles = totalTiles;
......@@ -428,6 +447,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
forceArgs[17] = &interactingAtoms->getDevicePointer();
findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer();
forceRebuildNeighborList = true;
context.setForcesValid(false);
return true;
}
......@@ -510,6 +530,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
args << "* __restrict__ ";
args << arguments[i].getName();
}
if (energyParameterDerivatives.size() > 0)
args << ", mixed* __restrict__ energyParamDerivs";
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1;
......@@ -618,6 +640,18 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
}
}
replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
stringstream initDerivs;
for (int i = 0; i < energyParameterDerivatives.size(); i++)
initDerivs<<"mixed energyParamDeriv"<<i<<" = 0;\n";
replacements["INIT_DERIVATIVES"] = initDerivs.str();
stringstream saveDerivs;
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
saveDerivs<<"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
replacements["SAVE_DERIVATIVES"] = saveDerivs.str();
stringstream shuffleWarpData;
if(useShuffle) {
......
......@@ -25,6 +25,10 @@
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR;
}
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
}
}
else {
#if HAS_LENNARD_JONES
......
......@@ -111,10 +111,12 @@ extern "C" __global__ void computeGroupForces(unsigned long long* __restrict__ g
const int* __restrict__ bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) {
mixed energy = 0;
INIT_PARAM_DERIVS
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_BONDS; index += blockDim.x*gridDim.x) {
COMPUTE_FORCE
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
}
/**
......
......@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
mixed energy = 0;
INIT_PARAM_DERIVS
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions.
......@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+j;
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 0.5f;
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
......@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+tj;
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1;
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
......@@ -168,6 +171,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else
......@@ -191,39 +196,35 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
}
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
#else
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[threadIdx.x] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[threadIdx.x] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
else
skipTiles[threadIdx.x] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
}
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx;
......@@ -268,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj];
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
dEdR /= -r;
......@@ -311,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj];
real dEdR = 0;
real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
dEdR /= -r;
......@@ -355,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
pos++;
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
}
......@@ -5,6 +5,7 @@
extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) {
mixed energy = 0;
INIT_PARAM_DERIVS
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Load the derivatives
......@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc
COMPUTE_ENERGY
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
}
......@@ -4,6 +4,7 @@
extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) {
INIT_PARAM_DERIVS
const real scale = RECIP((real) 0x100000000);
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 pos = posq[index];
......@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__
forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0x100000000);
forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0x100000000);
}
SAVE_PARAM_DERIVS
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment