Commit a381a3ab authored by peastman's avatar peastman
Browse files

Merge branch 'master' into gayberne

parents 5ecc8e00 1f7866ad
...@@ -314,8 +314,9 @@ private: ...@@ -314,8 +314,9 @@ private:
CustomNonbondedForce* forceCopy; CustomNonbondedForce* forceCopy;
std::map<std::string, double> globalParamValues; std::map<std::string, double> globalParamValues;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<std::string> parameterNames, globalParameterNames; std::vector<std::string> parameterNames, globalParameterNames, energyParamDerivNames;
std::vector<std::pair<std::set<int>, std::set<int> > > interactionGroups; std::vector<std::pair<std::set<int>, std::set<int> > > interactionGroups;
std::vector<double> longRangeCoefficientDerivs;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuCustomNonbondedForce* nonbonded; CpuCustomNonbondedForce* nonbonded;
}; };
...@@ -398,7 +399,7 @@ private: ...@@ -398,7 +399,7 @@ private:
RealOpenMM nonbondedCutoff; RealOpenMM nonbondedCutoff;
CpuCustomGBForce* ixn; CpuCustomGBForce* ixn;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<std::string> particleParameterNames, globalParameterNames, valueNames; std::vector<std::string> particleParameterNames, globalParameterNames, energyParamDerivNames, valueNames;
std::vector<OpenMM::CustomGBForce::ComputationType> valueTypes; std::vector<OpenMM::CustomGBForce::ComputationType> valueTypes;
std::vector<OpenMM::CustomGBForce::ComputationType> energyTypes; std::vector<OpenMM::CustomGBForce::ComputationType> energyTypes;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2014 Stanford University and the Authors. * * Portions copyright (c) 2014-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -196,7 +196,7 @@ void CpuBondForce::calculateForce(vector<RealVec>& atomCoordinates, RealOpenMM** ...@@ -196,7 +196,7 @@ void CpuBondForce::calculateForce(vector<RealVec>& atomCoordinates, RealOpenMM**
for (int i = 0; i < extraBonds.size(); i++) { for (int i = 0; i < extraBonds.size(); i++) {
int bond = extraBonds[i]; int bond = extraBonds[i];
referenceBondIxn.calculateBondIxn(bondAtoms[bond], atomCoordinates, parameters[bond], forces, totalEnergy); referenceBondIxn.calculateBondIxn(bondAtoms[bond], atomCoordinates, parameters[bond], forces, totalEnergy, NULL);
} }
// Compute the total energy. // Compute the total energy.
...@@ -212,6 +212,6 @@ void CpuBondForce::threadComputeForce(ThreadPool& threads, int threadIndex, vect ...@@ -212,6 +212,6 @@ void CpuBondForce::threadComputeForce(ThreadPool& threads, int threadIndex, vect
int numBonds = bonds.size(); int numBonds = bonds.size();
for (int i = 0; i < numBonds; i++) { for (int i = 0; i < numBonds; i++) {
int bond = bonds[i]; int bond = bonds[i];
referenceBondIxn.calculateBondIxn(bondAtoms[bond], atomCoordinates, parameters[bond], forces, totalEnergy); referenceBondIxn.calculateBondIxn(bondAtoms[bond], atomCoordinates, parameters[bond], forces, totalEnergy, NULL);
} }
} }
\ No newline at end of file
...@@ -47,13 +47,16 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa ...@@ -47,13 +47,16 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
const vector<Lepton::CompiledExpression>& valueExpressions, const vector<Lepton::CompiledExpression>& valueExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions, const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueGradientExpressions, const vector<vector<Lepton::CompiledExpression> >& valueGradientExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueParamDerivExpressions,
const vector<string>& valueNames, const vector<string>& valueNames,
const vector<Lepton::CompiledExpression>& energyExpressions, const vector<Lepton::CompiledExpression>& energyExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyDerivExpressions, const vector<vector<Lepton::CompiledExpression> >& energyDerivExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyGradientExpressions, const vector<vector<Lepton::CompiledExpression> >& energyGradientExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyParamDerivExpressions,
const vector<string>& parameterNames) : const vector<string>& parameterNames) :
valueExpressions(valueExpressions), valueDerivExpressions(valueDerivExpressions), valueGradientExpressions(valueGradientExpressions), valueExpressions(valueExpressions), valueDerivExpressions(valueDerivExpressions), valueGradientExpressions(valueGradientExpressions),
energyExpressions(energyExpressions), energyDerivExpressions(energyDerivExpressions), energyGradientExpressions(energyGradientExpressions) { valueParamDerivExpressions(valueParamDerivExpressions), energyExpressions(energyExpressions), energyDerivExpressions(energyDerivExpressions),
energyGradientExpressions(energyGradientExpressions), energyParamDerivExpressions(energyParamDerivExpressions) {
firstAtom = (threadIndex*(long long) numAtoms)/numThreads; firstAtom = (threadIndex*(long long) numAtoms)/numThreads;
lastAtom = ((threadIndex+1)*(long long) numAtoms)/numThreads; lastAtom = ((threadIndex+1)*(long long) numAtoms)/numThreads;
for (int i = 0; i < (int) valueExpressions.size(); i++) for (int i = 0; i < (int) valueExpressions.size(); i++)
...@@ -64,6 +67,9 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa ...@@ -64,6 +67,9 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
for (int i = 0; i < (int) valueGradientExpressions.size(); i++) for (int i = 0; i < (int) valueGradientExpressions.size(); i++)
for (int j = 0; j < (int) valueGradientExpressions[i].size(); j++) for (int j = 0; j < (int) valueGradientExpressions[i].size(); j++)
expressionSet.registerExpression(this->valueGradientExpressions[i][j]); expressionSet.registerExpression(this->valueGradientExpressions[i][j]);
for (int i = 0; i < (int) valueParamDerivExpressions.size(); i++)
for (int j = 0; j < (int) valueParamDerivExpressions[i].size(); j++)
expressionSet.registerExpression(this->valueParamDerivExpressions[i][j]);
for (int i = 0; i < (int) energyExpressions.size(); i++) for (int i = 0; i < (int) energyExpressions.size(); i++)
expressionSet.registerExpression(this->energyExpressions[i]); expressionSet.registerExpression(this->energyExpressions[i]);
for (int i = 0; i < (int) energyDerivExpressions.size(); i++) for (int i = 0; i < (int) energyDerivExpressions.size(); i++)
...@@ -72,6 +78,9 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa ...@@ -72,6 +78,9 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
for (int i = 0; i < (int) energyGradientExpressions.size(); i++) for (int i = 0; i < (int) energyGradientExpressions.size(); i++)
for (int j = 0; j < (int) energyGradientExpressions[i].size(); j++) for (int j = 0; j < (int) energyGradientExpressions[i].size(); j++)
expressionSet.registerExpression(this->energyGradientExpressions[i][j]); expressionSet.registerExpression(this->energyGradientExpressions[i][j]);
for (int i = 0; i < (int) energyParamDerivExpressions.size(); i++)
for (int j = 0; j < (int) energyParamDerivExpressions[i].size(); j++)
expressionSet.registerExpression(this->energyParamDerivExpressions[i][j]);
xindex = expressionSet.getVariableIndex("x"); xindex = expressionSet.getVariableIndex("x");
yindex = expressionSet.getVariableIndex("y"); yindex = expressionSet.getVariableIndex("y");
zindex = expressionSet.getVariableIndex("z"); zindex = expressionSet.getVariableIndex("z");
...@@ -101,30 +110,37 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa ...@@ -101,30 +110,37 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
dVdZ.resize(valueDerivExpressions.size()); dVdZ.resize(valueDerivExpressions.size());
dVdR1.resize(valueDerivExpressions.size()); dVdR1.resize(valueDerivExpressions.size());
dVdR2.resize(valueDerivExpressions.size()); dVdR2.resize(valueDerivExpressions.size());
dValue0dParam.resize(valueParamDerivExpressions[0].size(), vector<float>(numAtoms));
energyParamDerivs.resize(valueParamDerivExpressions[0].size());
} }
CpuCustomGBForce::CpuCustomGBForce(int numAtoms, const std::vector<std::set<int> >& exclusions, CpuCustomGBForce::CpuCustomGBForce(int numAtoms, const std::vector<std::set<int> >& exclusions,
const vector<Lepton::CompiledExpression>& valueExpressions, const vector<Lepton::CompiledExpression>& valueExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions, const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueGradientExpressions, const vector<vector<Lepton::CompiledExpression> >& valueGradientExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueParamDerivExpressions,
const vector<string>& valueNames, const vector<string>& valueNames,
const vector<CustomGBForce::ComputationType>& valueTypes, const vector<CustomGBForce::ComputationType>& valueTypes,
const vector<Lepton::CompiledExpression>& energyExpressions, const vector<Lepton::CompiledExpression>& energyExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyDerivExpressions, const vector<vector<Lepton::CompiledExpression> >& energyDerivExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyGradientExpressions, const vector<vector<Lepton::CompiledExpression> >& energyGradientExpressions,
const vector<vector<Lepton::CompiledExpression> >& energyParamDerivExpressions,
const vector<CustomGBForce::ComputationType>& energyTypes, const vector<CustomGBForce::ComputationType>& energyTypes,
const vector<string>& parameterNames, ThreadPool& threads) : const vector<string>& parameterNames, ThreadPool& threads) :
exclusions(exclusions), cutoff(false), periodic(false), valueNames(valueNames), valueTypes(valueTypes), exclusions(exclusions), cutoff(false), periodic(false), valueTypes(valueTypes), energyTypes(energyTypes), numValues(valueNames.size()),
energyTypes(energyTypes), paramNames(parameterNames), threads(threads) { numParams(parameterNames.size()), threads(threads) {
for (int i = 0; i < threads.getNumThreads(); i++) for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(numAtoms, threads.getNumThreads(), i, valueExpressions, valueDerivExpressions, valueGradientExpressions, valueNames, threadData.push_back(new ThreadData(numAtoms, threads.getNumThreads(), i, valueExpressions, valueDerivExpressions, valueGradientExpressions,
energyExpressions, energyDerivExpressions, energyGradientExpressions, parameterNames)); valueParamDerivExpressions, valueNames, energyExpressions, energyDerivExpressions, energyGradientExpressions, energyParamDerivExpressions, parameterNames));
values.resize(valueNames.size()); values.resize(numValues);
dEdV.resize(valueNames.size()); dEdV.resize(numValues);
for (int i = 0; i < (int) values.size(); i++) { for (int i = 0; i < (int) values.size(); i++) {
values[i].resize(numAtoms); values[i].resize(numAtoms);
dEdV[i].resize(numAtoms); dEdV[i].resize(numAtoms);
} }
dValuedParam.resize(numValues);
for (int i = 0; i < numValues; i++)
dValuedParam[i].resize(valueParamDerivExpressions[0].size(), vector<float>(numAtoms));
} }
CpuCustomGBForce::~CpuCustomGBForce() { CpuCustomGBForce::~CpuCustomGBForce() {
...@@ -153,7 +169,7 @@ void CpuCustomGBForce::setPeriodic(RealVec& boxSize) { ...@@ -153,7 +169,7 @@ void CpuCustomGBForce::setPeriodic(RealVec& boxSize) {
void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM** atomParameters, void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM** atomParameters,
map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce, map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
bool includeForce, bool includeEnergy, double& totalEnergy) { bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
...@@ -174,6 +190,14 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM** ...@@ -174,6 +190,14 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM**
threads.execute(task); threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Sum derivatives of the first computed value with respect to global parameters.
bool hasParamDerivs = (threadData[0]->dValue0dParam.size() > 0);
if (hasParamDerivs) {
threads.resumeThreads();
threads.waitForThreads();
}
// Calculate the remaining computed values. // Calculate the remaining computed values.
threads.resumeThreads(); threads.resumeThreads();
...@@ -205,6 +229,10 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM** ...@@ -205,6 +229,10 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, RealOpenMM**
for (int i = 0; i < numThreads; i++) for (int i = 0; i < numThreads; i++)
totalEnergy += threadEnergy[i]; totalEnergy += threadEnergy[i];
} }
if (hasParamDerivs)
for (int i = 0; i < threads.getNumThreads(); i++)
for (int j = 0; j < threadData[i]->energyParamDerivs.size(); j++)
energyParamDerivs[j] += threadData[i]->energyParamDerivs[j];
} }
void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex) { void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
...@@ -224,12 +252,29 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex) ...@@ -224,12 +252,29 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
for (int i = 0; i < (int) data.value0.size(); i++) for (int i = 0; i < (int) data.value0.size(); i++)
data.value0[i] = 0.0f; data.value0[i] = 0.0f;
for (int i = 0; i < (int) data.dValue0dParam.size(); i++)
for (int j = 0; j < (int) data.dValue0dParam[i].size(); j++)
data.dValue0dParam[i][j] = 0.0;
if (valueTypes[0] == CustomGBForce::ParticlePair) if (valueTypes[0] == CustomGBForce::ParticlePair)
calculateParticlePairValue(0, data, numberOfAtoms, posq, atomParameters, true, boxSize, invBoxSize); calculateParticlePairValue(0, data, numberOfAtoms, posq, atomParameters, true, boxSize, invBoxSize);
else else
calculateParticlePairValue(0, data, numberOfAtoms, posq, atomParameters, false, boxSize, invBoxSize); calculateParticlePairValue(0, data, numberOfAtoms, posq, atomParameters, false, boxSize, invBoxSize);
threads.syncThreads(); threads.syncThreads();
// Sum derivatives of the first computed value with respect to global parameters.
bool hasParamDerivs = (data.dValue0dParam.size() > 0);
if (hasParamDerivs) {
for (int j = 0; j < data.dValue0dParam.size(); j++)
for (int k = data.firstAtom; k < data.lastAtom; k++) {
float sum = 0.0f;
for (int m = 0; m < threadData.size(); m++)
sum += threadData[m]->dValue0dParam[j][k];
dValuedParam[0][j][k] = sum;
}
threads.syncThreads();
}
// Sum the first computed value and calculate the remaining ones. // Sum the first computed value and calculate the remaining ones.
int numValues = valueTypes.size(); int numValues = valueTypes.size();
...@@ -241,11 +286,23 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex) ...@@ -241,11 +286,23 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
data.expressionSet.setVariable(data.xindex, posq[4*atom]); data.expressionSet.setVariable(data.xindex, posq[4*atom]);
data.expressionSet.setVariable(data.yindex, posq[4*atom+1]); data.expressionSet.setVariable(data.yindex, posq[4*atom+1]);
data.expressionSet.setVariable(data.zindex, posq[4*atom+2]); data.expressionSet.setVariable(data.zindex, posq[4*atom+2]);
for (int j = 0; j < (int) paramNames.size(); j++) for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[atom][j]); data.expressionSet.setVariable(data.paramIndex[j], atomParameters[atom][j]);
for (int i = 1; i < numValues; i++) { for (int i = 1; i < numValues; i++) {
data.expressionSet.setVariable(data.valueIndex[i-1], values[i-1][atom]); data.expressionSet.setVariable(data.valueIndex[i-1], values[i-1][atom]);
values[i][atom] = (float) data.valueExpressions[i].evaluate(); values[i][atom] = (float) data.valueExpressions[i].evaluate();
// Calculate derivatives with respect to parameters.
if (hasParamDerivs) {
for (int j = 0; j < data.valueParamDerivExpressions[i].size(); j++)
dValuedParam[i][j][atom] = data.valueParamDerivExpressions[i][j].evaluate();
for (int j = 0; j < i; j++) {
float dVdV = data.valueDerivExpressions[i][j].evaluate();
for (int k = 0; k < data.valueParamDerivExpressions[i].size(); k++)
dValuedParam[i][k][atom] += dVdV*dValuedParam[j][k][atom];
}
}
} }
} }
threads.syncThreads(); threads.syncThreads();
...@@ -254,7 +311,9 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex) ...@@ -254,7 +311,9 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
for (int i = 0; i < (int) data.dEdV.size(); i++) for (int i = 0; i < (int) data.dEdV.size(); i++)
for (int j = 0; j < (int) data.dEdV[i].size(); j++) for (int j = 0; j < (int) data.dEdV[i].size(); j++)
data.dEdV[i][j] = 0.0; data.dEdV[i][j] = 0.0f;
for (int i = 0; i < (int) data.energyParamDerivs.size(); i++)
data.energyParamDerivs[i] = 0.0f;
for (int termIndex = 0; termIndex < (int) data.energyExpressions.size(); termIndex++) { for (int termIndex = 0; termIndex < (int) data.energyExpressions.size(); termIndex++) {
if (energyTypes[termIndex] == CustomGBForce::SingleParticle) if (energyTypes[termIndex] == CustomGBForce::SingleParticle)
calculateSingleParticleEnergyTerm(termIndex, data, numberOfAtoms, posq, atomParameters, forces, energy); calculateSingleParticleEnergyTerm(termIndex, data, numberOfAtoms, posq, atomParameters, forces, energy);
...@@ -339,7 +398,7 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th ...@@ -339,7 +398,7 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
if (cutoff && r2 >= cutoffDistance2) if (cutoff && r2 >= cutoffDistance2)
return; return;
float r = sqrtf(r2); float r = sqrtf(r2);
for (int i = 0; i < (int) paramNames.size(); i++) { for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]); data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]); data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
} }
...@@ -349,6 +408,11 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th ...@@ -349,6 +408,11 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]); data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]);
} }
valueArray[atom1] += (float) data.valueExpressions[index].evaluate(); valueArray[atom1] += (float) data.valueExpressions[index].evaluate();
// Calculate derivatives with respect to parameters.
for (int i = 0; i < data.valueParamDerivExpressions[index].size(); i++)
data.dValue0dParam[i][atom1] += data.valueParamDerivExpressions[index][i].evaluate();
} }
void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq, void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq,
...@@ -357,17 +421,22 @@ void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData& ...@@ -357,17 +421,22 @@ void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData&
data.expressionSet.setVariable(data.xindex, posq[4*i]); data.expressionSet.setVariable(data.xindex, posq[4*i]);
data.expressionSet.setVariable(data.yindex, posq[4*i+1]); data.expressionSet.setVariable(data.yindex, posq[4*i+1]);
data.expressionSet.setVariable(data.zindex, posq[4*i+2]); data.expressionSet.setVariable(data.zindex, posq[4*i+2]);
for (int j = 0; j < (int) paramNames.size(); j++) for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]); data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]);
for (int j = 0; j < (int) valueNames.size(); j++) for (int j = 0; j < (int) values.size(); j++)
data.expressionSet.setVariable(data.valueIndex[j], values[j][i]); data.expressionSet.setVariable(data.valueIndex[j], values[j][i]);
if (includeEnergy) if (includeEnergy)
totalEnergy += (float) data.energyExpressions[index].evaluate(); totalEnergy += (float) data.energyExpressions[index].evaluate();
for (int j = 0; j < (int) valueNames.size(); j++) for (int j = 0; j < (int) values.size(); j++)
data.dEdV[j][i] += (float) data.energyDerivExpressions[index][j].evaluate(); data.dEdV[j][i] += (float) data.energyDerivExpressions[index][j].evaluate();
forces[4*i+0] -= (float) data.energyGradientExpressions[index][0].evaluate(); forces[4*i+0] -= (float) data.energyGradientExpressions[index][0].evaluate();
forces[4*i+1] -= (float) data.energyGradientExpressions[index][1].evaluate(); forces[4*i+1] -= (float) data.energyGradientExpressions[index][1].evaluate();
forces[4*i+2] -= (float) data.energyGradientExpressions[index][2].evaluate(); forces[4*i+2] -= (float) data.energyGradientExpressions[index][2].evaluate();
// Compute derivatives with respect to parameters.
for (int k = 0; k < data.energyParamDerivExpressions[index].size(); k++)
data.energyParamDerivs[k] += data.energyParamDerivExpressions[index][k].evaluate();
} }
} }
...@@ -428,12 +497,12 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom ...@@ -428,12 +497,12 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom
// Record variables for evaluating expressions. // Record variables for evaluating expressions.
for (int i = 0; i < (int) paramNames.size(); i++) { for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]); data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]); data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
} }
data.expressionSet.setVariable(data.rindex, r); data.expressionSet.setVariable(data.rindex, r);
for (int i = 0; i < (int) valueNames.size(); i++) { for (int i = 0; i < (int) values.size(); i++) {
data.expressionSet.setVariable(data.particleValueIndex[i*2], values[i][atom1]); data.expressionSet.setVariable(data.particleValueIndex[i*2], values[i][atom1]);
data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]); data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]);
} }
...@@ -447,10 +516,15 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom ...@@ -447,10 +516,15 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom
fvec4 result = deltaR*dEdR; fvec4 result = deltaR*dEdR;
(fvec4(forces+4*atom1)-result).store(forces+4*atom1); (fvec4(forces+4*atom1)-result).store(forces+4*atom1);
(fvec4(forces+4*atom2)+result).store(forces+4*atom2); (fvec4(forces+4*atom2)+result).store(forces+4*atom2);
for (int i = 0; i < (int) valueNames.size(); i++) { for (int i = 0; i < (int) values.size(); i++) {
data.dEdV[i][atom1] += (float) data.energyDerivExpressions[index][2*i+1].evaluate(); data.dEdV[i][atom1] += (float) data.energyDerivExpressions[index][2*i+1].evaluate();
data.dEdV[i][atom2] += (float) data.energyDerivExpressions[index][2*i+2].evaluate(); data.dEdV[i][atom2] += (float) data.energyDerivExpressions[index][2*i+2].evaluate();
} }
// Compute derivatives with respect to parameters.
for (int i = 0; i < data.energyParamDerivExpressions[index].size(); i++)
data.energyParamDerivs[i] += data.energyParamDerivExpressions[index][i].evaluate();
} }
void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, float* posq, RealOpenMM** atomParameters, void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, float* posq, RealOpenMM** atomParameters,
...@@ -500,9 +574,9 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, ...@@ -500,9 +574,9 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
data.expressionSet.setVariable(data.xindex, posq[4*i]); data.expressionSet.setVariable(data.xindex, posq[4*i]);
data.expressionSet.setVariable(data.yindex, posq[4*i+1]); data.expressionSet.setVariable(data.yindex, posq[4*i+1]);
data.expressionSet.setVariable(data.zindex, posq[4*i+2]); data.expressionSet.setVariable(data.zindex, posq[4*i+2]);
for (int j = 0; j < (int) paramNames.size(); j++) for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]); data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]);
for (int j = 1; j < (int) valueNames.size(); j++) { for (int j = 1; j < (int) values.size(); j++) {
data.expressionSet.setVariable(data.valueIndex[j-1], values[j-1][i]); data.expressionSet.setVariable(data.valueIndex[j-1], values[j-1][i]);
data.dVdX[j] = 0.0; data.dVdX[j] = 0.0;
data.dVdY[j] = 0.0; data.dVdY[j] = 0.0;
...@@ -521,6 +595,13 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, ...@@ -521,6 +595,13 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
forces[4*i+2] -= dEdV[j][i]*data.dVdZ[j]; forces[4*i+2] -= dEdV[j][i]*data.dVdZ[j];
} }
} }
// Compute chain rule terms for derivatives with respect to parameters.
for (int i = data.firstAtom; i < data.lastAtom; i++)
for (int j = 0; j < data.valueIndex.size(); j++)
for (int k = 0; k < dValuedParam[j].size(); k++)
data.energyParamDerivs[k] += dEdV[j][i]*dValuedParam[j][k][i];
} }
void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadData& data, float* posq, RealOpenMM** atomParameters, void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadData& data, float* posq, RealOpenMM** atomParameters,
...@@ -538,7 +619,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat ...@@ -538,7 +619,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat
// Record variables for evaluating expressions. // Record variables for evaluating expressions.
for (int i = 0; i < (int) paramNames.size(); i++) { for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]); data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]); data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
data.expressionSet.setVariable(data.paramIndex[i], atomParameters[atom1][i]); data.expressionSet.setVariable(data.paramIndex[i], atomParameters[atom1][i]);
...@@ -562,7 +643,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat ...@@ -562,7 +643,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat
f1 -= deltaR*(dEdV[0][atom1]*data.dVdR1[0]); f1 -= deltaR*(dEdV[0][atom1]*data.dVdR1[0]);
f2 -= deltaR*(dEdV[0][atom1]*data.dVdR2[0]); f2 -= deltaR*(dEdV[0][atom1]*data.dVdR2[0]);
} }
for (int i = 1; i < (int) valueNames.size(); i++) { for (int i = 1; i < (int) values.size(); i++) {
data.expressionSet.setVariable(data.valueIndex[i], values[i][atom1]); data.expressionSet.setVariable(data.valueIndex[i], values[i][atom1]);
data.dVdR1[i] = 0.0; data.dVdR1[i] = 0.0;
data.dVdR2[i] = 0.0; data.dVdR2[i] = 0.0;
......
...@@ -43,25 +43,30 @@ public: ...@@ -43,25 +43,30 @@ public:
CpuCustomNonbondedForce& owner; CpuCustomNonbondedForce& owner;
}; };
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames) : CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
energyExpression(energyExpression), forceExpression(forceExpression) { const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyR = ReferenceForce::getVariablePointer(this->energyExpression, "r"); energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
forceR = ReferenceForce::getVariablePointer(this->forceExpression, "r"); expressionSet.registerExpression(this->energyExpression);
expressionSet.registerExpression(this->forceExpression);
for (int i = 0; i < this->energyParamDerivExpressions.size(); i++)
expressionSet.registerExpression(this->energyParamDerivExpressions[i]);
rIndex = expressionSet.getVariableIndex("r");
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
for (int j = 1; j < 3; j++) { for (int j = 1; j < 3; j++) {
stringstream name; stringstream name;
name << parameterNames[i] << j; name << parameterNames[i] << j;
energyParticleParams.push_back(ReferenceForce::getVariablePointer(this->energyExpression, name.str())); particleParamIndex.push_back(expressionSet.getVariableIndex(name.str()));
forceParticleParams.push_back(ReferenceForce::getVariablePointer(this->forceExpression, name.str()));
} }
} }
energyParamDerivs.resize(energyParamDerivExpressions.size());
} }
CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression, CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression,
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,ThreadPool& threads) : const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) :
cutoff(false), useSwitch(false), periodic(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) { cutoff(false), useSwitch(false), periodic(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
for (int i = 0; i < threads.getNumThreads(); i++) for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames)); threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions));
} }
CpuCustomNonbondedForce::~CpuCustomNonbondedForce() { CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
...@@ -120,7 +125,7 @@ void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) { ...@@ -120,7 +125,7 @@ void CpuCustomNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<RealVec>& atomCoordinates, RealOpenMM** atomParameters, void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<RealVec>& atomCoordinates, RealOpenMM** atomParameters,
RealOpenMM* fixedParameters, const map<string, double>& globalParameters, RealOpenMM* fixedParameters, const map<string, double>& globalParameters,
vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy) { vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
...@@ -144,11 +149,18 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v ...@@ -144,11 +149,18 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
// Combine the energies from all the threads. // Combine the energies from all the threads.
if (includeEnergy) {
int numThreads = threads.getNumThreads(); int numThreads = threads.getNumThreads();
if (includeEnergy) {
for (int i = 0; i < numThreads; i++) for (int i = 0; i < numThreads; i++)
totalEnergy += threadEnergy[i]; totalEnergy += threadEnergy[i];
} }
// Combine the energy derivatives from all threads.
int numDerivs = threadData[0]->energyParamDerivs.size();
for (int i = 0; i < numThreads; i++)
for (int j = 0; j < numDerivs; j++)
energyParamDerivs[j] += threadData[i]->energyParamDerivs[j];
} }
void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int threadIndex) { void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
...@@ -159,10 +171,10 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -159,10 +171,10 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
double& energy = threadEnergy[threadIndex]; double& energy = threadEnergy[threadIndex];
float* forces = &(*threadForce)[threadIndex][0]; float* forces = &(*threadForce)[threadIndex][0];
ThreadData& data = *threadData[threadIndex]; ThreadData& data = *threadData[threadIndex];
for (map<string, double>::const_iterator iter = globalParameters->begin(); iter != globalParameters->end(); ++iter) { for (map<string, double>::const_iterator iter = globalParameters->begin(); iter != globalParameters->end(); ++iter)
ReferenceForce::setVariable(ReferenceForce::getVariablePointer(data.energyExpression, iter->first), iter->second); data.expressionSet.setVariable(data.expressionSet.getVariableIndex(iter->first), iter->second);
ReferenceForce::setVariable(ReferenceForce::getVariablePointer(data.forceExpression, iter->first), iter->second); for (int i = 0; i < data.energyParamDerivs.size(); i++)
} data.energyParamDerivs[i] = 0.0;
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0); fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0); fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (groupInteractions.size() > 0) { if (groupInteractions.size() > 0) {
...@@ -175,10 +187,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -175,10 +187,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
int atom1 = groupInteractions[i].first; int atom1 = groupInteractions[i].first;
int atom2 = groupInteractions[i].second; int atom2 = groupInteractions[i].second;
for (int j = 0; j < (int) paramNames.size(); j++) { for (int j = 0; j < (int) paramNames.size(); j++) {
ReferenceForce::setVariable(data.energyParticleParams[j*2], atomParameters[atom1][j]); data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[atom1][j]);
ReferenceForce::setVariable(data.energyParticleParams[j*2+1], atomParameters[atom2][j]); data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[atom2][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2], atomParameters[atom1][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2+1], atomParameters[atom2][j]);
} }
calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize); calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize);
} }
...@@ -196,17 +206,13 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -196,17 +206,13 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex); const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i]; int first = neighbors[i];
for (int j = 0; j < (int) paramNames.size(); j++) { for (int j = 0; j < (int) paramNames.size(); j++)
ReferenceForce::setVariable(data.energyParticleParams[j*2], atomParameters[first][j]); data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[first][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2], atomParameters[first][j]);
}
for (int k = 0; k < blockSize; k++) { for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) { if ((exclusions[i] & (1<<k)) == 0) {
int second = blockAtom[k]; int second = blockAtom[k];
for (int j = 0; j < (int) paramNames.size(); j++) { for (int j = 0; j < (int) paramNames.size(); j++)
ReferenceForce::setVariable(data.energyParticleParams[j*2+1], atomParameters[second][j]); data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[second][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2+1], atomParameters[second][j]);
}
calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize); calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize);
} }
} }
...@@ -223,10 +229,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -223,10 +229,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
for (int jj = ii+1; jj < numberOfAtoms; jj++) { for (int jj = ii+1; jj < numberOfAtoms; jj++) {
if (exclusions[jj].find(ii) == exclusions[jj].end()) { if (exclusions[jj].find(ii) == exclusions[jj].end()) {
for (int j = 0; j < (int) paramNames.size(); j++) { for (int j = 0; j < (int) paramNames.size(); j++) {
ReferenceForce::setVariable(data.energyParticleParams[j*2], atomParameters[ii][j]); data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[ii][j]);
ReferenceForce::setVariable(data.energyParticleParams[j*2+1], atomParameters[jj][j]); data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[jj][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2], atomParameters[ii][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2+1], atomParameters[jj][j]);
} }
calculateOneIxn(ii, jj, data, forces, energy, boxSize, invBoxSize); calculateOneIxn(ii, jj, data, forces, energy, boxSize, invBoxSize);
} }
...@@ -250,15 +254,15 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data, ...@@ -250,15 +254,15 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
// accumulate forces // accumulate forces
ReferenceForce::setVariable(data.energyR, r); data.expressionSet.setVariable(data.rIndex, r);
ReferenceForce::setVariable(data.forceR, r);
double dEdR = (includeForce ? data.forceExpression.evaluate()/r : 0.0); double dEdR = (includeForce ? data.forceExpression.evaluate()/r : 0.0);
double energy = (includeEnergy ? data.energyExpression.evaluate() : 0.0); double energy = (includeEnergy ? data.energyExpression.evaluate() : 0.0);
double switchValue = 1.0;
if (useSwitch) { if (useSwitch) {
if (r > switchingDistance) { if (r > switchingDistance) {
RealOpenMM t = (r-switchingDistance)/(cutoffDistance-switchingDistance); double t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
RealOpenMM switchValue = 1+t*t*t*(-10+t*(15-t*6)); switchValue = 1+t*t*t*(-10+t*(15-t*6));
RealOpenMM switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance); double switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
dEdR = switchValue*dEdR + energy*switchDeriv/r; dEdR = switchValue*dEdR + energy*switchDeriv/r;
energy *= switchValue; energy *= switchValue;
} }
...@@ -270,6 +274,11 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data, ...@@ -270,6 +274,11 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
// accumulate energies // accumulate energies
totalEnergy += energy; totalEnergy += energy;
// Accumulate energy derivatives.
for (int i = 0; i < data.energyParamDerivExpressions.size(); i++)
data.energyParamDerivs[i] += switchValue*data.energyParamDerivExpressions[i].evaluate();
} }
void CpuCustomNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuCustomNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, const fvec4& boxSize, const fvec4& invBoxSize) const {
......
...@@ -85,6 +85,11 @@ static ReferenceConstraints& extractConstraints(ContextImpl& context) { ...@@ -85,6 +85,11 @@ static ReferenceConstraints& extractConstraints(ContextImpl& context) {
return *(ReferenceConstraints*) data->constraints; return *(ReferenceConstraints*) data->constraints;
} }
static map<string, double>& extractEnergyParameterDerivatives(ContextImpl& context) {
ReferencePlatform::PlatformData* data = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
return *((map<string, double>*) data->energyParameterDerivatives);
}
/** /**
* Make sure an expression doesn't use any undefined variables. * Make sure an expression doesn't use any undefined variables.
*/ */
...@@ -814,6 +819,12 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C ...@@ -814,6 +819,12 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
globalParameterNames.push_back(force.getGlobalParameterName(i)); globalParameterNames.push_back(force.getGlobalParameterName(i));
globalParamValues[force.getGlobalParameterName(i)] = force.getGlobalParameterDefaultValue(i); globalParamValues[force.getGlobalParameterName(i)] = force.getGlobalParameterDefaultValue(i);
} }
std::vector<Lepton::CompiledExpression> energyParamDerivExpressions;
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string param = force.getEnergyParameterDerivativeName(i);
energyParamDerivNames.push_back(param);
energyParamDerivExpressions.push_back(expression.differentiate(param).createCompiledExpression());
}
set<string> variables; set<string> variables;
variables.insert("r"); variables.insert("r");
for (int i = 0; i < numParameters; i++) { for (int i = 0; i < numParameters; i++) {
...@@ -847,7 +858,7 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C ...@@ -847,7 +858,7 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
interactionGroups.push_back(make_pair(set1, set2)); interactionGroups.push_back(make_pair(set1, set2));
} }
data.isPeriodic = (nonbondedMethod == CutoffPeriodic); data.isPeriodic = (nonbondedMethod == CutoffPeriodic);
nonbonded = new CpuCustomNonbondedForce(energyExpression, forceExpression, parameterNames, exclusions, data.threads); nonbonded = new CpuCustomNonbondedForce(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions, data.threads);
if (interactionGroups.size() > 0) if (interactionGroups.size() > 0)
nonbonded->setInteractionGroups(interactionGroups); nonbonded->setInteractionGroups(interactionGroups);
} }
...@@ -875,15 +886,22 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc ...@@ -875,15 +886,22 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc
} }
if (useSwitchingFunction) if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance); nonbonded->setUseSwitchingFunction(switchingDistance);
nonbonded->calculatePairIxn(numParticles, &data.posq[0], posData, particleParamArray, 0, globalParamValues, data.threadForce, includeForces, includeEnergy, energy); vector<double> energyParamDerivValues(energyParamDerivNames.size()+1, 0.0);
nonbonded->calculatePairIxn(numParticles, &data.posq[0], posData, particleParamArray, 0, globalParamValues, data.threadForce, includeForces, includeEnergy, energy, &energyParamDerivValues[0]);
map<string, double>& energyParamDerivs = extractEnergyParameterDerivatives(context);
for (int i = 0; i < energyParamDerivNames.size(); i++)
energyParamDerivs[energyParamDerivNames[i]] += energyParamDerivValues[i];
// Add in the long range correction. // Add in the long range correction.
if (!hasInitializedLongRangeCorrection || (globalParamsChanged && forceCopy != NULL)) { if (!hasInitializedLongRangeCorrection || (globalParamsChanged && forceCopy != NULL)) {
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner()); CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
} }
energy += longRangeCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]); double volume = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
energy += longRangeCoefficient/volume;
for (int i = 0; i < longRangeCoefficientDerivs.size(); i++)
energyParamDerivs[energyParamDerivNames[i]] += longRangeCoefficientDerivs[i]/volume;
return energy; return energy;
} }
...@@ -905,7 +923,7 @@ void CpuCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& con ...@@ -905,7 +923,7 @@ void CpuCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& con
// If necessary, recompute the long range correction. // If necessary, recompute the long range correction.
if (forceCopy != NULL) { if (forceCopy != NULL) {
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(force, context.getOwner()); CustomNonbondedForceImpl::calcLongRangeCorrection(force, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
*forceCopy = force; *forceCopy = force;
} }
...@@ -1027,6 +1045,7 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB ...@@ -1027,6 +1045,7 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
vector<vector<Lepton::CompiledExpression> > valueDerivExpressions(force.getNumComputedValues()); vector<vector<Lepton::CompiledExpression> > valueDerivExpressions(force.getNumComputedValues());
vector<vector<Lepton::CompiledExpression> > valueGradientExpressions(force.getNumComputedValues()); vector<vector<Lepton::CompiledExpression> > valueGradientExpressions(force.getNumComputedValues());
vector<vector<Lepton::CompiledExpression> > valueParamDerivExpressions(force.getNumComputedValues());
vector<Lepton::CompiledExpression> valueExpressions; vector<Lepton::CompiledExpression> valueExpressions;
vector<Lepton::CompiledExpression> energyExpressions; vector<Lepton::CompiledExpression> energyExpressions;
set<string> particleVariables, pairVariables; set<string> particleVariables, pairVariables;
...@@ -1061,6 +1080,11 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB ...@@ -1061,6 +1080,11 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
valueDerivExpressions[i].push_back(ex.differentiate(valueNames[j]).createCompiledExpression()); valueDerivExpressions[i].push_back(ex.differentiate(valueNames[j]).createCompiledExpression());
validateVariables(ex.getRootNode(), particleVariables); validateVariables(ex.getRootNode(), particleVariables);
} }
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++) {
string param = force.getEnergyParameterDerivativeName(j);
energyParamDerivNames.push_back(param);
valueParamDerivExpressions[i].push_back(ex.differentiate(param).createCompiledExpression());
}
particleVariables.insert(name); particleVariables.insert(name);
pairVariables.insert(name+"1"); pairVariables.insert(name+"1");
pairVariables.insert(name+"2"); pairVariables.insert(name+"2");
...@@ -1070,6 +1094,7 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB ...@@ -1070,6 +1094,7 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
vector<vector<Lepton::CompiledExpression> > energyDerivExpressions(force.getNumEnergyTerms()); vector<vector<Lepton::CompiledExpression> > energyDerivExpressions(force.getNumEnergyTerms());
vector<vector<Lepton::CompiledExpression> > energyGradientExpressions(force.getNumEnergyTerms()); vector<vector<Lepton::CompiledExpression> > energyGradientExpressions(force.getNumEnergyTerms());
vector<vector<Lepton::CompiledExpression> > energyParamDerivExpressions(force.getNumEnergyTerms());
for (int i = 0; i < force.getNumEnergyTerms(); i++) { for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression; string expression;
CustomGBForce::ComputationType type; CustomGBForce::ComputationType type;
...@@ -1093,14 +1118,17 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB ...@@ -1093,14 +1118,17 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
validateVariables(ex.getRootNode(), pairVariables); validateVariables(ex.getRootNode(), pairVariables);
} }
} }
for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).createCompiledExpression());
} }
// Delete the custom functions. // Delete the custom functions.
for (map<string, Lepton::CustomFunction*>::iterator iter = functions.begin(); iter != functions.end(); iter++) for (map<string, Lepton::CustomFunction*>::iterator iter = functions.begin(); iter != functions.end(); iter++)
delete iter->second; delete iter->second;
ixn = new CpuCustomGBForce(numParticles, exclusions, valueExpressions, valueDerivExpressions, valueGradientExpressions, valueNames, valueTypes, energyExpressions, ixn = new CpuCustomGBForce(numParticles, exclusions, valueExpressions, valueDerivExpressions, valueGradientExpressions, valueParamDerivExpressions,
energyDerivExpressions, energyGradientExpressions, energyTypes, particleParameterNames, data.threads); valueNames, valueTypes, energyExpressions, energyDerivExpressions, energyGradientExpressions, energyParamDerivExpressions, energyTypes,
particleParameterNames, data.threads);
data.isPeriodic = (force.getNonbondedMethod() == CustomGBForce::CutoffPeriodic); data.isPeriodic = (force.getNonbondedMethod() == CustomGBForce::CutoffPeriodic);
} }
...@@ -1117,7 +1145,11 @@ double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFor ...@@ -1117,7 +1145,11 @@ double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFor
map<string, double> globalParameters; map<string, double> globalParameters;
for (int i = 0; i < (int) globalParameterNames.size(); i++) for (int i = 0; i < (int) globalParameterNames.size(); i++)
globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]); globalParameters[globalParameterNames[i]] = context.getParameter(globalParameterNames[i]);
ixn->calculateIxn(numParticles, &data.posq[0], particleParamArray, globalParameters, data.threadForce, includeForces, includeEnergy, energy); vector<double> energyParamDerivValues(energyParamDerivNames.size()+1, 0.0);
ixn->calculateIxn(numParticles, &data.posq[0], particleParamArray, globalParameters, data.threadForce, includeForces, includeEnergy, energy, &energyParamDerivValues[0]);
map<string, double>& energyParamDerivs = extractEnergyParameterDerivatives(context);
for (int i = 0; i < energyParamDerivNames.size(); i++)
energyParamDerivs[energyParamDerivNames[i]] += energyParamDerivValues[i];
return energy; return energy;
} }
......
...@@ -386,13 +386,15 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -386,13 +386,15 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float inverseR = 1/r; float inverseR = 1/r;
float chargeProdOverR = scaledChargeI*posq[4*j+3]*inverseR; float chargeProdOverR = scaledChargeI*posq[4*j+3]*inverseR;
float dEdR = chargeProdOverR*inverseR*inverseR; float dEdR = chargeProdOverR*inverseR*inverseR;
dEdR = dEdR * (erfAlphaR-(float)TWO_OVER_SQRT_PI*alphaR*(float)exp(-alphaR*alphaR)); dEdR = dEdR * (erfAlphaR-TWO_OVER_SQRT_PI*alphaR*(float)exp(-alphaR*alphaR));
fvec4 result = deltaR*dEdR; fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i); (fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j); (fvec4(forces+4*j)+result).store(forces+4*j);
if (includeEnergy) if (includeEnergy)
threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR; threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
} }
else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
} }
} }
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. * * Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -100,6 +100,15 @@ public: ...@@ -100,6 +100,15 @@ public:
* refer to it by this name. * refer to it by this name.
*/ */
std::string addArgument(CUdeviceptr data, const std::string& type); std::string addArgument(CUdeviceptr data, const std::string& type);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/** /**
* Add some Cuda code that should be included in the program, before the start of the kernel. * Add some Cuda code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel. * This can be used, for example, to define functions that will be called by the kernel.
...@@ -129,6 +138,7 @@ private: ...@@ -129,6 +138,7 @@ private:
std::vector<std::string> argTypes; std::vector<std::string> argTypes;
std::vector<std::vector<CudaArray*> > atomIndices; std::vector<std::vector<CudaArray*> > atomIndices;
std::vector<std::string> prefixCode; std::vector<std::string> prefixCode;
std::vector<std::string> energyParameterDerivatives;
std::vector<void*> kernelArgs; std::vector<void*> kernelArgs;
int numForceBuffers, maxBonds, allGroups; int numForceBuffers, maxBonds, allGroups;
bool hasInitializedKernels, hasInteractions; bool hasInitializedKernels, hasInteractions;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -177,6 +177,12 @@ public: ...@@ -177,6 +177,12 @@ public:
CudaArray& getEnergyBuffer() { CudaArray& getEnergyBuffer() {
return *energyBuffer; return *energyBuffer;
} }
/**
* Get the array which contains the buffer in which derivatives of the energy with respect to parameters are computed.
*/
CudaArray& getEnergyParamDerivBuffer() {
return *energyParamDerivBuffer;
}
/** /**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device. * Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class. * This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
...@@ -326,6 +332,18 @@ public: ...@@ -326,6 +332,18 @@ public:
void setStepsSinceReorder(int steps) { void setStepsSinceReorder(int steps) {
stepsSinceReorder = steps; stepsSinceReorder = steps;
} }
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
bool getForcesValid() const {
return forcesValid;
}
/**
* Get the flag that marks whether the current force evaluation is valid.
*/
void setForcesValid(bool valid) {
forcesValid = valid;
}
/** /**
* Get the number of atoms. * Get the number of atoms.
*/ */
...@@ -532,6 +550,27 @@ public: ...@@ -532,6 +550,27 @@ public:
std::vector<ForcePostComputation*>& getPostComputations() { std::vector<ForcePostComputation*>& getPostComputations() {
return postComputations; return postComputations;
} }
/**
* Get the names of all parameters with respect to which energy derivatives are computed.
*/
const std::vector<std::string>& getEnergyParamDerivNames() const {
return energyParamDerivNames;
}
/**
* Get a workspace data structure used for accumulating the values of derivatives of the energy
* with respect to parameters.
*/
std::map<std::string, double>& getEnergyParamDerivWorkspace() {
return energyParamDerivWorkspace;
}
/**
* Register that the derivative of potential energy with respect to a context parameter
* will need to be calculated. If this is called multiple times for a single parameter,
* it is only added to the list once.
*
* @param param the name of the parameter to add
*/
void addEnergyParameterDerivative(const std::string& param);
/** /**
* Mark that the current molecule definitions (and hence the atom order) may be invalid. * Mark that the current molecule definitions (and hence the atom order) may be invalid.
* This should be called whenever force field parameters change. It will cause the definitions * This should be called whenever force field parameters change. It will cause the definitions
...@@ -572,7 +611,7 @@ private: ...@@ -572,7 +611,7 @@ private:
int paddedNumAtoms; int paddedNumAtoms;
int numAtomBlocks; int numAtomBlocks;
int numThreadBlocks; int numThreadBlocks;
bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel; bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, atomsWereReordered, boxIsTriclinic, hasCompilerKernel, forcesValid;
std::string compiler, tempDir, cacheDir, gpuArchitecture; std::string compiler, tempDir, cacheDir, gpuArchitecture;
float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat; float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize; double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
...@@ -597,7 +636,10 @@ private: ...@@ -597,7 +636,10 @@ private:
CudaArray* velm; CudaArray* velm;
CudaArray* force; CudaArray* force;
CudaArray* energyBuffer; CudaArray* energyBuffer;
CudaArray* energyParamDerivBuffer;
CudaArray* atomIndexDevice; CudaArray* atomIndexDevice;
std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex; std::vector<int> atomIndex;
std::vector<CUdeviceptr> autoclearBuffers; std::vector<CUdeviceptr> autoclearBuffers;
std::vector<int> autoclearBufferSizes; std::vector<int> autoclearBufferSizes;
......
...@@ -163,6 +163,12 @@ public: ...@@ -163,6 +163,12 @@ public:
* @param forces on exit, this contains the forces * @param forces on exit, this contains the forces
*/ */
void getForces(ContextImpl& context, std::vector<Vec3>& forces); void getForces(ContextImpl& context, std::vector<Vec3>& forces);
/**
* Get the current derivatives of the energy with respect to context parameters.
*
* @param derivs on exit, this contains the derivatives
*/
void getEnergyParameterDerivatives(ContextImpl& context, std::map<std::string, double>& derivs);
/** /**
* Get the current periodic box vectors. * Get the current periodic box vectors.
* *
...@@ -729,6 +735,7 @@ private: ...@@ -729,6 +735,7 @@ private:
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray*> tabulatedFunctions;
double longRangeCoefficient; double longRangeCoefficient;
std::vector<double> longRangeCoefficientDerivs;
bool hasInitializedLongRangeCorrection, hasInitializedKernel; bool hasInitializedLongRangeCorrection, hasInitializedKernel;
int numGroupThreadBlocks; int numGroupThreadBlocks;
CustomNonbondedForce* forceCopy; CustomNonbondedForce* forceCopy;
...@@ -819,13 +826,15 @@ public: ...@@ -819,13 +826,15 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force); void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private: private:
double cutoff; double cutoff;
bool hasInitializedKernels, needParameterGradient; bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues; int maxTiles, numComputedValues;
CudaContext& cu; CudaContext& cu;
CudaParameterSet* params; CudaParameterSet* params;
CudaParameterSet* computedValues; CudaParameterSet* computedValues;
CudaParameterSet* energyDerivs; CudaParameterSet* energyDerivs;
CudaParameterSet* energyDerivChain; CudaParameterSet* energyDerivChain;
std::vector<CudaParameterSet*> dValuedParam;
std::vector<CudaArray*> dValue0dParam;
CudaArray* longEnergyDerivs; CudaArray* longEnergyDerivs;
CudaArray* globals; CudaArray* globals;
CudaArray* valueBuffers; CudaArray* valueBuffers;
...@@ -970,6 +979,7 @@ public: ...@@ -970,6 +979,7 @@ public:
private: private:
int numGroups, numBonds; int numGroups, numBonds;
bool needEnergyParamDerivs;
CudaContext& cu; CudaContext& cu;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
...@@ -1284,7 +1294,7 @@ public: ...@@ -1284,7 +1294,7 @@ public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER}; enum GlobalTargetType {DT, VARIABLE, PARAMETER};
CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu), CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
randomSeed(NULL), perDofValues(NULL) { randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
} }
~CudaIntegrateCustomStepKernel(); ~CudaIntegrateCustomStepKernel();
/** /**
...@@ -1349,8 +1359,11 @@ public: ...@@ -1349,8 +1359,11 @@ public:
private: private:
class ReorderListener; class ReorderListener;
class GlobalTarget; class GlobalTarget;
class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName); std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target); void recordGlobalValue(double value, GlobalTarget target);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step); bool evaluateCondition(int step);
...@@ -1358,18 +1371,23 @@ private: ...@@ -1358,18 +1371,23 @@ private:
double energy; double energy;
float energyFloat; float energyFloat;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
CudaArray* globalValues; CudaArray* globalValues;
CudaArray* sumBuffer; CudaArray* sumBuffer;
CudaArray* summedValue; CudaArray* summedValue;
CudaArray* uniformRandoms; CudaArray* uniformRandoms;
CudaArray* randomSeed; CudaArray* randomSeed;
CudaArray* perDofEnergyParamDerivs;
std::map<int, CudaArray*> savedForces; std::map<int, CudaArray*> savedForces;
std::set<int> validSavedForces; std::set<int> validSavedForces;
CudaParameterSet* perDofValues; CudaParameterSet* perDofValues;
mutable std::vector<std::vector<float> > localPerDofValuesFloat; mutable std::vector<std::vector<float> > localPerDofValuesFloat;
mutable std::vector<std::vector<double> > localPerDofValuesDouble; mutable std::vector<std::vector<double> > localPerDofValuesDouble;
std::map<std::string, double> energyParamDerivs;
std::vector<std::string> perDofEnergyParamDerivNames;
std::vector<float> localPerDofEnergyParamDerivsFloat;
std::vector<double> localPerDofEnergyParamDerivsDouble;
std::vector<float> globalValuesFloat; std::vector<float> globalValuesFloat;
std::vector<double> globalValuesDouble; std::vector<double> globalValuesDouble;
std::vector<double> initialGlobalVariables; std::vector<double> initialGlobalVariables;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2013 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -88,6 +88,15 @@ public: ...@@ -88,6 +88,15 @@ public:
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel. * Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/ */
void addArgument(const ParameterInfo& parameter); void addArgument(const ParameterInfo& parameter);
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
std::string addEnergyParameterDerivative(const std::string& param);
/** /**
* Specify the list of exclusions that an interaction outside the default kernel will depend on. * Specify the list of exclusions that an interaction outside the default kernel will depend on.
* *
...@@ -269,10 +278,13 @@ private: ...@@ -269,10 +278,13 @@ private:
CudaArray* oldPositions; CudaArray* oldPositions;
CudaArray* rebuildNeighborList; CudaArray* rebuildNeighborList;
CudaSort* blockSorter; CudaSort* blockSorter;
CUevent downloadCountEvent;
int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs; std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments; std::vector<ParameterInfo> arguments;
std::vector<std::string> energyParameterDerivatives;
std::map<int, double> groupCutoff; std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. * * Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -52,12 +52,25 @@ void CudaBondedUtilities::addInteraction(const vector<vector<int> >& atoms, cons ...@@ -52,12 +52,25 @@ void CudaBondedUtilities::addInteraction(const vector<vector<int> >& atoms, cons
} }
} }
std::string CudaBondedUtilities::addArgument(CUdeviceptr data, const string& type) { string CudaBondedUtilities::addArgument(CUdeviceptr data, const string& type) {
arguments.push_back(data); arguments.push_back(data);
argTypes.push_back(type); argTypes.push_back(type);
return "customArg"+context.intToString(arguments.size()); return "customArg"+context.intToString(arguments.size());
} }
string CudaBondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void CudaBondedUtilities::addPrefixCode(const string& source) { void CudaBondedUtilities::addPrefixCode(const string& source) {
for (int i = 0; i < (int) prefixCode.size(); i++) for (int i = 0; i < (int) prefixCode.size(); i++)
if (prefixCode[i] == source) if (prefixCode[i] == source)
...@@ -109,11 +122,21 @@ void CudaBondedUtilities::initialize(const System& system) { ...@@ -109,11 +122,21 @@ void CudaBondedUtilities::initialize(const System& system) {
} }
for (int i = 0; i < (int) arguments.size(); i++) for (int i = 0; i < (int) arguments.size(); i++)
s<<", "<<argTypes[i]<<"* customArg"<<(i+1); s<<", "<<argTypes[i]<<"* customArg"<<(i+1);
if (energyParameterDerivatives.size() > 0)
s<<", mixed* __restrict__ energyParamDerivs";
s<<") {\n"; s<<") {\n";
s<<"mixed energy = 0;\n"; s<<"mixed energy = 0;\n";
for (int i = 0; i < energyParameterDerivatives.size(); i++)
s<<"mixed energyParamDeriv"<<i<<" = 0;\n";
for (int force = 0; force < numForces; force++) for (int force = 0; force < numForces; force++)
s<<createForceSource(force, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]); s<<createForceSource(force, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
s<<"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;\n"; s<<"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;\n";
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
s<<"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
s<<"}\n"; s<<"}\n";
map<string, string> defines; map<string, string> defines;
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
...@@ -171,6 +194,8 @@ void CudaBondedUtilities::computeInteractions(int groups) { ...@@ -171,6 +194,8 @@ void CudaBondedUtilities::computeInteractions(int groups) {
kernelArgs.push_back(&atomIndices[i][j]->getDevicePointer()); kernelArgs.push_back(&atomIndices[i][j]->getDevicePointer());
for (int i = 0; i < (int) arguments.size(); i++) for (int i = 0; i < (int) arguments.size(); i++)
kernelArgs.push_back(&arguments[i]); kernelArgs.push_back(&arguments[i]);
if (energyParameterDerivatives.size() > 0)
kernelArgs.push_back(&context.getEnergyParamDerivBuffer().getDevicePointer());
} }
if (!hasInteractions) if (!hasInteractions)
return; return;
......
...@@ -76,7 +76,7 @@ bool CudaContext::hasInitializedCuda = false; ...@@ -76,7 +76,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0), const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) { pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
this->compiler = "\""+compiler+"\""; this->compiler = "\""+compiler+"\"";
if (platformData.context != NULL) { if (platformData.context != NULL) {
try { try {
...@@ -339,6 +339,8 @@ CudaContext::~CudaContext() { ...@@ -339,6 +339,8 @@ CudaContext::~CudaContext() {
delete force; delete force;
if (energyBuffer != NULL) if (energyBuffer != NULL)
delete energyBuffer; delete energyBuffer;
if (energyParamDerivBuffer != NULL)
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL) if (atomIndexDevice != NULL)
delete atomIndexDevice; delete atomIndexDevice;
if (integration != NULL) if (integration != NULL)
...@@ -390,6 +392,14 @@ void CudaContext::initialize() { ...@@ -390,6 +392,14 @@ void CudaContext::initialize() {
force = CudaArray::create<long long>(*this, paddedNumAtoms*3, "force"); force = CudaArray::create<long long>(*this, paddedNumAtoms*3, "force");
addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize()); addAutoclearBuffer(force->getDevicePointer(), force->getSize()*force->getElementSize());
addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize()); addAutoclearBuffer(energyBuffer->getDevicePointer(), energyBuffer->getSize()*energyBuffer->getElementSize());
int numEnergyParamDerivs = energyParamDerivNames.size();
if (numEnergyParamDerivs > 0) {
if (useDoublePrecision || useMixedPrecision)
energyParamDerivBuffer = CudaArray::create<double>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
else
energyParamDerivBuffer = CudaArray::create<float>(*this, numEnergyParamDerivs*numEnergyBuffers, "energyParamDerivBuffer");
addAutoclearBuffer(*energyParamDerivBuffer);
}
atomIndexDevice = CudaArray::create<int>(*this, paddedNumAtoms, "atomIndex"); atomIndexDevice = CudaArray::create<int>(*this, paddedNumAtoms, "atomIndex");
atomIndex.resize(paddedNumAtoms); atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i) for (int i = 0; i < paddedNumAtoms; ++i)
...@@ -1131,7 +1141,6 @@ void CudaContext::reorderAtoms() { ...@@ -1131,7 +1141,6 @@ void CudaContext::reorderAtoms() {
reorderAtomsImpl<float, float4, double, double4>(); reorderAtomsImpl<float, float4, double, double4>();
else else
reorderAtomsImpl<float, float4, float, float4>(); reorderAtomsImpl<float, float4, float, float4>();
nonbonded->updateNeighborListSize();
} }
template <class Real, class Real4, class Mixed, class Mixed4> template <class Real, class Real4, class Mixed, class Mixed4>
...@@ -1312,6 +1321,15 @@ void CudaContext::addPostComputation(ForcePostComputation* computation) { ...@@ -1312,6 +1321,15 @@ void CudaContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation); postComputations.push_back(computation);
} }
void CudaContext::addEnergyParameterDerivative(const string& param) {
// See if this parameter has already been registered.
for (int i = 0; i < energyParamDerivNames.size(); i++)
if (param == energyParamDerivNames[i])
return;
energyParamDerivNames.push_back(param);
}
struct CudaContext::WorkThread::ThreadData { struct CudaContext::WorkThread::ThreadData {
ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting, bool& finished, ThreadData(std::queue<CudaContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) : pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
......
...@@ -174,8 +174,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -174,8 +174,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n"; out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n"; out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n"; out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n"; out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n"; out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n"; out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
out << "float4 c[4];\n"; out << "float4 c[4];\n";
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
...@@ -217,9 +217,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -217,9 +217,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n"; out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n";
out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n"; out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n";
out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n"; out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n"; out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n"; out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << ");\n"; out << "int u = min((int) floor(z), " << paramsInt[2] << "-1);\n";
out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n"; out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n";
out << "float4 c[16];\n"; out << "float4 c[16];\n";
for (int j = 0; j < 16; j++) for (int j = 0; j < 16; j++)
......
This diff is collapsed.
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. * * Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -65,12 +65,14 @@ private: ...@@ -65,12 +65,14 @@ private:
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true), CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL), interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) { oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), pinnedCountBuffer(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
// Decide how many thread blocks to use. // Decide how many thread blocks to use.
string errorMessage = "Error initializing nonbonded utilities"; string errorMessage = "Error initializing nonbonded utilities";
int multiprocessors; int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice())); CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice()));
CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0));
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, sizeof(int), CU_MEMHOSTALLOC_PORTABLE));
numForceThreadBlocks = 4*multiprocessors; numForceThreadBlocks = 4*multiprocessors;
forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256); forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
} }
...@@ -106,6 +108,9 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() { ...@@ -106,6 +108,9 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete rebuildNeighborList; delete rebuildNeighborList;
if (blockSorter != NULL) if (blockSorter != NULL)
delete blockSorter; delete blockSorter;
if (pinnedCountBuffer != NULL)
cuMemFreeHost(pinnedCountBuffer);
cuEventDestroy(downloadCountEvent);
} }
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) { void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
...@@ -141,6 +146,19 @@ void CudaNonbondedUtilities::addArgument(const ParameterInfo& parameter) { ...@@ -141,6 +146,19 @@ void CudaNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments.push_back(parameter); arguments.push_back(parameter);
} }
string CudaNonbondedUtilities::addEnergyParameterDerivative(const string& param) {
// See if the parameter has already been added.
int index;
for (index = 0; index < energyParameterDerivatives.size(); index++)
if (param == energyParameterDerivatives[index])
break;
if (index == energyParameterDerivatives.size())
energyParameterDerivatives.push_back(param);
context.addEnergyParameterDerivative(param);
return string("energyParamDeriv")+context.intToString(index);
}
void CudaNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) { void CudaNonbondedUtilities::requestExclusions(const vector<vector<int> >& exclusionList) {
if (anyExclusions) { if (anyExclusions) {
bool sameExclusions = (exclusionList.size() == atomExclusions.size()); bool sameExclusions = (exclusionList.size() == atomExclusions.size());
...@@ -303,6 +321,8 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -303,6 +321,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
forceArgs.push_back(&parameters[i].getMemory()); forceArgs.push_back(&parameters[i].getMemory());
for (int i = 0; i < (int) arguments.size(); i++) for (int i = 0; i < (int) arguments.size(); i++)
forceArgs.push_back(&arguments[i].getMemory()); forceArgs.push_back(&arguments[i].getMemory());
if (energyParameterDerivatives.size() > 0)
forceArgs.push_back(&context.getEnergyParamDerivBuffer().getDevicePointer());
if (useCutoff) { if (useCutoff) {
findBlockBoundsArgs.push_back(&numAtoms); findBlockBoundsArgs.push_back(&numAtoms);
findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer()); findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer());
...@@ -375,17 +395,14 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) { ...@@ -375,17 +395,14 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if (lastCutoff != kernels.cutoffDistance) if (lastCutoff != kernels.cutoffDistance)
forceRebuildNeighborList = true; forceRebuildNeighborList = true;
bool rebuild = false;
do {
context.executeKernel(kernels.findBlockBoundsKernel, &findBlockBoundsArgs[0], context.getNumAtoms()); context.executeKernel(kernels.findBlockBoundsKernel, &findBlockBoundsArgs[0], context.getNumAtoms());
blockSorter->sort(*sortedBlocks); blockSorter->sort(*sortedBlocks);
context.executeKernel(kernels.sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms()); context.executeKernel(kernels.sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms());
context.executeKernel(kernels.findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256); context.executeKernel(kernels.findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256);
forceRebuildNeighborList = false; forceRebuildNeighborList = false;
if (context.getComputeForceCount() == 1)
rebuild = updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
} while(rebuild);
lastCutoff = kernels.cutoffDistance; lastCutoff = kernels.cutoffDistance;
interactionCount->download(pinnedCountBuffer, false);
cuEventRecord(downloadCountEvent, context.getCurrentStream());
} }
void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) { void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
...@@ -398,20 +415,22 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo ...@@ -398,20 +415,22 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
kernel = createInteractionKernel(kernels.source, parameters, arguments, true, true, forceGroups, includeForces, includeEnergy); kernel = createInteractionKernel(kernels.source, parameters, arguments, true, true, forceGroups, includeForces, includeEnergy);
context.executeKernel(kernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); context.executeKernel(kernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
} }
if (useCutoff && numTiles > 0) {
cuEventSynchronize(downloadCountEvent);
updateNeighborListSize();
}
} }
bool CudaNonbondedUtilities::updateNeighborListSize() { bool CudaNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return false; return false;
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer(); if (pinnedCountBuffer[0] <= (unsigned int) maxTiles)
interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) maxTiles)
return false; return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent // The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future. // this from happening in the future.
maxTiles = (int) (1.2*pinnedInteractionCount[0]); maxTiles = (int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
maxTiles = totalTiles; maxTiles = totalTiles;
...@@ -428,6 +447,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -428,6 +447,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
forceArgs[17] = &interactingAtoms->getDevicePointer(); forceArgs[17] = &interactingAtoms->getDevicePointer();
findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer(); findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer();
forceRebuildNeighborList = true; forceRebuildNeighborList = true;
context.setForcesValid(false);
return true; return true;
} }
...@@ -510,6 +530,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -510,6 +530,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
args << "* __restrict__ "; args << "* __restrict__ ";
args << arguments[i].getName(); args << arguments[i].getName();
} }
if (energyParameterDerivatives.size() > 0)
args << ", mixed* __restrict__ energyParamDerivs";
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1; stringstream load1;
...@@ -618,6 +640,18 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -618,6 +640,18 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
} }
} }
replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str(); replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
stringstream initDerivs;
for (int i = 0; i < energyParameterDerivatives.size(); i++)
initDerivs<<"mixed energyParamDeriv"<<i<<" = 0;\n";
replacements["INIT_DERIVATIVES"] = initDerivs.str();
stringstream saveDerivs;
const vector<string>& allParamDerivNames = context.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < energyParameterDerivatives.size(); i++)
for (int index = 0; index < numDerivs; index++)
if (allParamDerivNames[index] == energyParameterDerivatives[i])
saveDerivs<<"energyParamDerivs[(blockIdx.x*blockDim.x+threadIdx.x)*"<<numDerivs<<"+"<<index<<"] += energyParamDeriv"<<i<<";\n";
replacements["SAVE_DERIVATIVES"] = saveDerivs.str();
stringstream shuffleWarpData; stringstream shuffleWarpData;
if(useShuffle) { if(useShuffle) {
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR; tempEnergy += -prefactor*erfAlphaR;
} }
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
}
} }
else { else {
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
......
...@@ -111,10 +111,12 @@ extern "C" __global__ void computeGroupForces(unsigned long long* __restrict__ g ...@@ -111,10 +111,12 @@ extern "C" __global__ void computeGroupForces(unsigned long long* __restrict__ g
const int* __restrict__ bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ const int* __restrict__ bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) { EXTRA_ARGS) {
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_BONDS; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_BONDS; index += blockDim.x*gridDim.x) {
COMPUTE_FORCE COMPUTE_FORCE
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
} }
/** /**
......
...@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -28,6 +28,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions. // First loop: process tiles that contain exclusions.
...@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -69,6 +70,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+j; atom2 = y*TILE_SIZE+j;
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 0.5f;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -120,6 +122,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = y*TILE_SIZE+tj; atom2 = y*TILE_SIZE+tj;
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1); bool isExcluded = !(excl & 0x1);
#endif #endif
...@@ -168,6 +171,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -168,6 +171,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else #else
...@@ -191,16 +196,12 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -191,16 +196,12 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos]; x = tiles[pos];
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} #else
else
#endif
{
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos)); y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
...@@ -223,7 +224,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -223,7 +224,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
while (skipTiles[currentSkipIndex] < pos) while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++; currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos); includeTile = (skipTiles[currentSkipIndex] != pos);
} #endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
...@@ -268,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -268,6 +269,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r; dEdR /= -r;
...@@ -311,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -311,6 +313,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
atom2 = atomIndices[tbx+tj]; atom2 = atomIndices[tbx+tj];
real dEdR = 0; real dEdR = 0;
real tempEnergy = 0; real tempEnergy = 0;
const real interactionScale = 1;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r; dEdR /= -r;
...@@ -355,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -355,4 +358,5 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
pos++; pos++;
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
} }
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed energy = 0; mixed energy = 0;
INIT_PARAM_DERIVS
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Load the derivatives // Load the derivatives
...@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc ...@@ -17,4 +18,5 @@ extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forc
COMPUTE_ENERGY COMPUTE_ENERGY
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
SAVE_PARAM_DERIVS
} }
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ forceBuffers, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
INIT_PARAM_DERIVS
const real scale = RECIP((real) 0x100000000); const real scale = RECIP((real) 0x100000000);
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) { for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
real4 pos = posq[index]; real4 pos = posq[index];
...@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__ ...@@ -13,4 +14,5 @@ extern "C" __global__ void computeGradientChainRuleTerms(long long* __restrict__
forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0x100000000); forceBuffers[index+PADDED_NUM_ATOMS] = (long long) (force.y*0x100000000);
forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0x100000000); forceBuffers[index+PADDED_NUM_ATOMS*2] = (long long) (force.z*0x100000000);
} }
SAVE_PARAM_DERIVS
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment