Commit 3b6925ae authored by Andy Simmonett's avatar Andy Simmonett Committed by GitHub
Browse files

Merge pull request #1 from peastman/ljpme

Cleanup to LJ PME code
parents 5a8a8aa9 f7a102fb
......@@ -129,6 +129,7 @@ private:
bool useSwitch;
bool periodic;
bool triclinic;
bool useInteractionGroups;
const CpuNeighborList* neighborList;
float recipBoxSize[3];
RealVec periodicBoxVectors[3];
......@@ -183,8 +184,8 @@ public:
Lepton::CompiledExpression forceExpression;
std::vector<Lepton::CompiledExpression> energyParamDerivExpressions;
CompiledExpressionSet expressionSet;
std::vector<int> particleParamIndex;
int rIndex;
std::vector<double> particleParam;
double r;
std::vector<RealOpenMM> energyParamDerivs;
};
......
......@@ -258,20 +258,30 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class PmeIO;
CpuPlatform::PlatformData& data;
int numParticles, num14;
int **bonded14IndexArray;
double **bonded14ParamArray;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldSelfEnergy, dispersionCoefficient;
int kmax[3], gridSize[3];
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient;
int kmax[3], gridSize[3], dispersionGridSize[3];
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme, hasInitializedDispersionPme;
std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams;
std::vector<float> C6params;
NonbondedMethod nonbondedMethod;
CpuNonbondedForce* nonbonded;
Kernel optimizedPme;
Kernel optimizedPme, optimizedDispersionPme;
CpuBondForce bondForce;
};
......
/* Portions copyright (c) 2013-2015 Stanford University and Simbios.
/* Portions copyright (c) 2013-2016 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
......@@ -43,12 +43,12 @@ public:
*
* @param numberOfAtoms number of atoms
* @param deltaT delta t for dynamics
* @param tau viscosity
* @param friction friction coefficient
* @param temperature temperature
* @param threads thread pool for parallelizing computation
* @param random random number generator
*/
CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, OpenMM::ThreadPool& threads, OpenMM::CpuRandom& random);
CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM friction, RealOpenMM temperature, OpenMM::ThreadPool& threads, OpenMM::CpuRandom& random);
/**
* Destructor.
......
......@@ -114,6 +114,17 @@ class CpuNonbondedForce {
void setUsePME(float alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void setUseLJPME(float alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Calculate Ewald ixn
......@@ -122,6 +133,7 @@ class CpuNonbondedForce {
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param C6Paramrs C6 parameters for multiplicative representation of dispersion
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
......@@ -130,8 +142,8 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
std::vector<RealVec>& forces, double* totalEnergy) const;
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<float> &C6params,
const std::vector<std::set<int> >& exclusions, std::vector<RealVec>& forces, double* totalEnergy) const;
/**---------------------------------------------------------------------------------------
......@@ -150,7 +162,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
const std::vector<float>& C6params, const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/**
* This routine contains the code executed by each thread.
......@@ -163,28 +175,32 @@ protected:
bool periodic;
bool triclinic;
bool ewald;
bool pme;
bool tableIsValid;
bool ljpme, pme;
bool tableIsValid, expTableIsValid;
const CpuNeighborList* neighborList;
float recipBoxSize[3];
RealVec periodicBoxVectors[3];
AlignedArray<fvec4> periodicBoxVec4;
float cutoffDistance, switchingDistance;
float krf, crf;
float alphaEwald;
float alphaEwald, alphaDispersionEwald;
int numRx, numRy, numRz;
int meshDim[3];
int meshDim[3], dispersionMeshDim[3];
std::vector<float> erfcTable, ewaldScaleTable;
float ewaldDX, ewaldDXInv, erfcDXInv;
std::vector<float> exptermsTable, dExptermsTable;
float ewaldDX, ewaldDXInv, erfcDXInv, exptermsDX, exptermsDXInv;
std::vector<double> threadEnergy;
// The following variables are used to make information accessible to the individual threads.
int numberOfAtoms;
float* posq;
RealVec const* atomCoordinates;
std::pair<float, float> const* atomParameters;
float const *C6params;
std::set<int> const* exclusions;
std::vector<AlignedArray<float> >* threadForce;
bool includeEnergy;
float inverseRcut6;
float inverseRcut6Expterm;
void* atomicCounter;
static const float TWO_OVER_SQRT_PI;
......@@ -238,10 +254,29 @@ protected:
*/
void tabulateEwaldScaleFactor();
/**
* Create a lookup table for the scale factor used with dispersion PME.
*/
void tabulateExpTerms();
/**
* Compute a fast approximation to erfc(x).
*/
float erfcApprox(float x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
float exptermsApprox(float R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
float dExptermsApprox(float R);
};
} // namespace OpenMM
......
......@@ -93,6 +93,20 @@ protected:
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec4 ewaldScaleFunction(const fvec4& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec4 exptermsApprox(const fvec4& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec4 dExptermsApprox(const fvec4& R);
};
} // namespace OpenMM
......
......@@ -92,6 +92,21 @@ protected:
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec8 ewaldScaleFunction(const fvec8& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec8 exptermsApprox(const fvec8& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec8 dExptermsApprox(const fvec8& R);
};
} // namespace OpenMM
......
......@@ -59,47 +59,68 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
energyGradientExpressions(energyGradientExpressions), energyParamDerivExpressions(energyParamDerivExpressions) {
firstAtom = (threadIndex*(long long) numAtoms)/numThreads;
lastAtom = ((threadIndex+1)*(long long) numAtoms)/numThreads;
for (int i = 0; i < (int) valueExpressions.size(); i++)
map<string, double*> variableLocations;
variableLocations["x"] = &x;
variableLocations["y"] = &y;
variableLocations["z"] = &z;
variableLocations["r"] = &r;
param.resize(parameterNames.size());
particleParam.resize(parameterNames.size()*2);
for (int i = 0; i < (int) parameterNames.size(); i++) {
variableLocations[parameterNames[i]] = &param[i];
for (int j = 0; j < 2; j++) {
stringstream name;
name << parameterNames[i] << (j+1);
variableLocations[name.str()] = &particleParam[2*i+j];
}
}
value.resize(valueNames.size());
particleValue.resize(valueNames.size()*2);
for (int i = 0; i < (int) valueNames.size(); i++) {
variableLocations[valueNames[i]] = &value[i];
for (int j = 0; j < 2; j++) {
stringstream name;
name << valueNames[i] << (j+1);
variableLocations[name.str()] = &particleValue[2*i+j];
}
}
for (int i = 0; i < (int) valueExpressions.size(); i++) {
this->valueExpressions[i].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->valueExpressions[i]);
}
for (int i = 0; i < (int) valueDerivExpressions.size(); i++)
for (int j = 0; j < (int) valueDerivExpressions[i].size(); j++)
for (int j = 0; j < (int) valueDerivExpressions[i].size(); j++) {
this->valueDerivExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->valueDerivExpressions[i][j]);
}
for (int i = 0; i < (int) valueGradientExpressions.size(); i++)
for (int j = 0; j < (int) valueGradientExpressions[i].size(); j++)
for (int j = 0; j < (int) valueGradientExpressions[i].size(); j++) {
this->valueGradientExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->valueGradientExpressions[i][j]);
}
for (int i = 0; i < (int) valueParamDerivExpressions.size(); i++)
for (int j = 0; j < (int) valueParamDerivExpressions[i].size(); j++)
for (int j = 0; j < (int) valueParamDerivExpressions[i].size(); j++) {
this->valueParamDerivExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->valueParamDerivExpressions[i][j]);
for (int i = 0; i < (int) energyExpressions.size(); i++)
}
for (int i = 0; i < (int) energyExpressions.size(); i++) {
this->energyExpressions[i].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyExpressions[i]);
}
for (int i = 0; i < (int) energyDerivExpressions.size(); i++)
for (int j = 0; j < (int) energyDerivExpressions[i].size(); j++)
for (int j = 0; j < (int) energyDerivExpressions[i].size(); j++) {
this->energyDerivExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyDerivExpressions[i][j]);
}
for (int i = 0; i < (int) energyGradientExpressions.size(); i++)
for (int j = 0; j < (int) energyGradientExpressions[i].size(); j++)
for (int j = 0; j < (int) energyGradientExpressions[i].size(); j++) {
this->energyGradientExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyGradientExpressions[i][j]);
}
for (int i = 0; i < (int) energyParamDerivExpressions.size(); i++)
for (int j = 0; j < (int) energyParamDerivExpressions[i].size(); j++)
for (int j = 0; j < (int) energyParamDerivExpressions[i].size(); j++) {
this->energyParamDerivExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyParamDerivExpressions[i][j]);
xindex = expressionSet.getVariableIndex("x");
yindex = expressionSet.getVariableIndex("y");
zindex = expressionSet.getVariableIndex("z");
rindex = expressionSet.getVariableIndex("r");
for (int i = 0; i < (int) parameterNames.size(); i++) {
paramIndex.push_back(expressionSet.getVariableIndex(parameterNames[i]));
for (int j = 1; j < 3; j++) {
stringstream name;
name << parameterNames[i] << j;
particleParamIndex.push_back(expressionSet.getVariableIndex(name.str()));
}
}
for (int i = 0; i < (int) valueNames.size(); i++) {
valueIndex.push_back(expressionSet.getVariableIndex(valueNames[i]));
for (int j = 1; j < 3; j++) {
stringstream name;
name << valueNames[i] << j;
particleValueIndex.push_back(expressionSet.getVariableIndex(name.str()));
}
}
value0.resize(numAtoms);
dEdV.resize(valueNames.size());
......@@ -283,13 +304,13 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
for (int j = 0; j < (int) threadData.size(); j++)
sum += threadData[j]->value0[atom];
values[0][atom] = sum;
data.expressionSet.setVariable(data.xindex, posq[4*atom]);
data.expressionSet.setVariable(data.yindex, posq[4*atom+1]);
data.expressionSet.setVariable(data.zindex, posq[4*atom+2]);
data.x = posq[4*atom];
data.y = posq[4*atom+1];
data.z = posq[4*atom+2];
for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[atom][j]);
data.param[j] = atomParameters[atom][j];
for (int i = 1; i < numValues; i++) {
data.expressionSet.setVariable(data.valueIndex[i-1], values[i-1][atom]);
data.value[i-1] = values[i-1][atom];
values[i][atom] = (float) data.valueExpressions[i].evaluate();
// Calculate derivatives with respect to parameters.
......@@ -397,15 +418,14 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
getDeltaR(pos2, pos1, deltaR, r2, periodic, boxSize, invBoxSize);
if (cutoff && r2 >= cutoffDistance2)
return;
float r = sqrtf(r2);
data.r = sqrtf(r2);
for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
data.particleParam[i*2] = atomParameters[atom1][i];
data.particleParam[i*2+1] = atomParameters[atom2][i];
}
data.expressionSet.setVariable(data.rindex, r);
for (int i = 0; i < index; i++) {
data.expressionSet.setVariable(data.particleValueIndex[i*2], values[i][atom1]);
data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]);
data.particleValue[i*2] = values[i][atom1];
data.particleValue[i*2+1] = values[i][atom2];
}
valueArray[atom1] += (float) data.valueExpressions[index].evaluate();
......@@ -418,13 +438,13 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq,
RealOpenMM** atomParameters, float* forces, double& totalEnergy) {
for (int i = data.firstAtom; i < data.lastAtom; i++) {
data.expressionSet.setVariable(data.xindex, posq[4*i]);
data.expressionSet.setVariable(data.yindex, posq[4*i+1]);
data.expressionSet.setVariable(data.zindex, posq[4*i+2]);
data.x = posq[4*i];
data.y = posq[4*i+1];
data.z = posq[4*i+2];
for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]);
data.param[j] = atomParameters[i][j];
for (int j = 0; j < (int) values.size(); j++)
data.expressionSet.setVariable(data.valueIndex[j], values[j][i]);
data.value[j] = values[j][i];
if (includeEnergy)
totalEnergy += (float) data.energyExpressions[index].evaluate();
for (int j = 0; j < (int) values.size(); j++)
......@@ -494,17 +514,17 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom
if (cutoff && r2 >= cutoffDistance2)
return;
float r = sqrtf(r2);
data.r = r;
// Record variables for evaluating expressions.
for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
data.particleParam[i*2] = atomParameters[atom1][i];
data.particleParam[i*2+1] = atomParameters[atom2][i];
}
data.expressionSet.setVariable(data.rindex, r);
for (int i = 0; i < (int) values.size(); i++) {
data.expressionSet.setVariable(data.particleValueIndex[i*2], values[i][atom1]);
data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]);
data.particleValue[i*2] = values[i][atom1];
data.particleValue[i*2+1] = values[i][atom2];
}
// Evaluate the energy and its derivatives.
......@@ -571,13 +591,13 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
// Compute chain rule terms for computed values that depend explicitly on particle coordinates.
for (int i = data.firstAtom; i < data.lastAtom; i++) {
data.expressionSet.setVariable(data.xindex, posq[4*i]);
data.expressionSet.setVariable(data.yindex, posq[4*i+1]);
data.expressionSet.setVariable(data.zindex, posq[4*i+2]);
data.x = posq[4*i];
data.y = posq[4*i+1];
data.z = posq[4*i+2];
for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]);
data.param[j] = atomParameters[i][j];
for (int j = 1; j < (int) values.size(); j++) {
data.expressionSet.setVariable(data.valueIndex[j-1], values[j-1][i]);
data.value[j-1] = values[j-1][i];
data.dVdX[j] = 0.0;
data.dVdY[j] = 0.0;
data.dVdZ[j] = 0.0;
......@@ -599,7 +619,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
// Compute chain rule terms for derivatives with respect to parameters.
for (int i = data.firstAtom; i < data.lastAtom; i++)
for (int j = 0; j < data.valueIndex.size(); j++)
for (int j = 0; j < data.value.size(); j++)
for (int k = 0; k < dValuedParam[j].size(); k++)
data.energyParamDerivs[k] += dEdV[j][i]*dValuedParam[j][k][i];
}
......@@ -616,21 +636,21 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat
if (cutoff && r2 >= cutoffDistance2)
return;
float r = sqrtf(r2);
data.r = r;
// Record variables for evaluating expressions.
for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]);
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]);
data.expressionSet.setVariable(data.paramIndex[i], atomParameters[atom1][i]);
}
data.expressionSet.setVariable(data.valueIndex[0], values[0][atom1]);
data.expressionSet.setVariable(data.xindex, posq[4*atom1]);
data.expressionSet.setVariable(data.yindex, posq[4*atom1+1]);
data.expressionSet.setVariable(data.zindex, posq[4*atom1+2]);
data.expressionSet.setVariable(data.rindex, r);
data.expressionSet.setVariable(data.particleValueIndex[0], values[0][atom1]);
data.expressionSet.setVariable(data.particleValueIndex[1], values[0][atom2]);
data.particleParam[i*2] = atomParameters[atom1][i];
data.particleParam[i*2+1] = atomParameters[atom2][i];
data.param[i] = atomParameters[atom1][i];
}
data.value[0] = values[0][atom1];
data.x = posq[4*atom1];
data.y = posq[4*atom1+1];
data.z = posq[4*atom1+2];
data.particleValue[0] = values[0][atom1];
data.particleValue[1] = values[0][atom2];
// Evaluate the derivative of each parameter with respect to position and apply forces.
......@@ -644,7 +664,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat
f2 -= deltaR*(dEdV[0][atom1]*data.dVdR2[0]);
}
for (int i = 1; i < (int) values.size(); i++) {
data.expressionSet.setVariable(data.valueIndex[i], values[i][atom1]);
data.value[i] = values[i][atom1];
data.dVdR1[i] = 0.0;
data.dVdR2[i] = 0.0;
for (int j = 0; j < i; j++) {
......
......@@ -46,25 +46,31 @@ public:
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
expressionSet.registerExpression(this->energyExpression);
expressionSet.registerExpression(this->forceExpression);
for (int i = 0; i < this->energyParamDerivExpressions.size(); i++)
expressionSet.registerExpression(this->energyParamDerivExpressions[i]);
rIndex = expressionSet.getVariableIndex("r");
map<string, double*> variableLocations;
variableLocations["r"] = &r;
particleParam.resize(2*parameterNames.size());
for (int i = 0; i < (int) parameterNames.size(); i++) {
for (int j = 1; j < 3; j++) {
for (int j = 0; j < 2; j++) {
stringstream name;
name << parameterNames[i] << j;
particleParamIndex.push_back(expressionSet.getVariableIndex(name.str()));
name << parameterNames[i] << (j+1);
variableLocations[name.str()] = &particleParam[i*2+j];
}
}
energyParamDerivs.resize(energyParamDerivExpressions.size());
this->energyExpression.setVariableLocations(variableLocations);
this->forceExpression.setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyExpression);
expressionSet.registerExpression(this->forceExpression);
for (int i = 0; i < this->energyParamDerivExpressions.size(); i++) {
this->energyParamDerivExpressions[i].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyParamDerivExpressions[i]);
}
}
CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression,
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) :
cutoff(false), useSwitch(false), periodic(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions));
}
......@@ -81,6 +87,7 @@ void CpuCustomNonbondedForce::setUseCutoff(RealOpenMM distance, const CpuNeighbo
}
void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) {
useInteractionGroups = true;
for (int group = 0; group < (int) groups.size(); group++) {
const set<int>& set1 = groups[group].first;
const set<int>& set2 = groups[group].second;
......@@ -177,7 +184,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
data.energyParamDerivs[i] = 0.0;
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (groupInteractions.size() > 0) {
if (useInteractionGroups) {
// The user has specified interaction groups, so compute only the requested interactions.
while (true) {
......@@ -187,8 +194,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
int atom1 = groupInteractions[i].first;
int atom2 = groupInteractions[i].second;
for (int j = 0; j < (int) paramNames.size(); j++) {
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[atom1][j]);
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[atom2][j]);
data.particleParam[j*2] = atomParameters[atom1][j];
data.particleParam[j*2+1] = atomParameters[atom2][j];
}
calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize);
}
......@@ -207,12 +214,12 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
for (int j = 0; j < (int) paramNames.size(); j++)
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[first][j]);
data.particleParam[j*2] = atomParameters[first][j];
for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) {
int second = blockAtom[k];
for (int j = 0; j < (int) paramNames.size(); j++)
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[second][j]);
data.particleParam[j*2+1] = atomParameters[second][j];
calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize);
}
}
......@@ -229,8 +236,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
for (int jj = ii+1; jj < numberOfAtoms; jj++) {
if (exclusions[jj].find(ii) == exclusions[jj].end()) {
for (int j = 0; j < (int) paramNames.size(); j++) {
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[ii][j]);
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[jj][j]);
data.particleParam[j*2] = atomParameters[ii][j];
data.particleParam[j*2+1] = atomParameters[jj][j];
}
calculateOneIxn(ii, jj, data, forces, energy, boxSize, invBoxSize);
}
......@@ -251,10 +258,10 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
if (cutoff && r2 >= cutoffDistance*cutoffDistance)
return;
float r = sqrtf(r2);
data.r = r;
// accumulate forces
data.expressionSet.setVariable(data.rIndex, r);
double dEdR = (includeForce ? data.forceExpression.evaluate()/r : 0.0);
double energy = (includeEnergy ? data.energyExpression.evaluate() : 0.0);
double switchValue = 1.0;
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
/* Portions copyright (c) 2006-2016 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -89,6 +88,10 @@ void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, f
particleParams = params;
bornRadii.resize(params.size()+3);
obcChain.resize(params.size()+3);
for (int i = bornRadii.size()-3; i < bornRadii.size(); i++) {
bornRadii[i] = 0;
obcChain[i] = 0;
}
}
void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
......
......@@ -50,6 +50,7 @@
#include "lepton/CustomFunction.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include <iostream>
#include "lepton/ParsedExpression.h"
using namespace OpenMM;
......@@ -528,7 +529,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce* createCpuNonbondedForceVec8();
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), nonbonded(NULL) {
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
if (isVec8Supported())
nonbonded = createCpuNonbondedForceVec8();
else
......@@ -575,12 +576,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles);
C6params.resize(numParticles);
double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge;
}
......@@ -616,19 +619,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
}
else if (nonbondedMethod == PME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = (RealOpenMM) alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = (RealOpenMM) alpha;
useSwitchingFunction = false;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
else
if(nonbondedMethod == LJPME){
for (int atom = 0; atom < numParticles; atom++) {
// Dispersion self term
ewaldSelfEnergy += pow(ewaldDispersionAlpha, 6.0) * C6params[atom]*C6params[atom] / 12.0;
}
}
} else {
ewaldSelfEnergy = 0.0;
}
rfDielectric = force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME);
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
}
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
......@@ -646,6 +665,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
}
}
if (nonbondedMethod == LJPME) {
// If available, use the optimized PME implementation.
vector<string> kernelNames;
kernelNames.push_back("CalcPmeReciprocalForce");
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
}
}
}
AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context);
......@@ -654,6 +687,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
if (data.isPeriodic) {
......@@ -669,9 +703,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded->setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance);
if (ljpme){
nonbonded->setUsePME(ewaldAlpha, gridSize);
nonbonded->setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
double nonbondedEnergy = 0;
if (includeDirect)
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
if (includeReciprocal) {
if (useOptimizedPme) {
PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
......@@ -680,13 +718,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
}
else
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
}
energy += nonbondedEnergy;
if (includeDirect) {
ReferenceLJCoulomb14 nonbonded14;
bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (data.isPeriodic)
if (data.isPeriodic && nonbondedMethod != LJPME)
energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
}
return energy;
......@@ -739,7 +777,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
}
void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
......@@ -751,6 +789,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
}
}
void CpuCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedDispersionPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
}
CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) {
}
......@@ -1285,8 +1336,7 @@ void CpuIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langevi
if (dynamics)
delete dynamics;
RealOpenMM tau = (friction == 0.0 ? 0.0 : 1.0/friction);
dynamics = new CpuLangevinDynamics(context.getSystem().getNumParticles(), stepSize, tau, temperature, data.threads, data.random);
dynamics = new CpuLangevinDynamics(context.getSystem().getNumParticles(), stepSize, friction, temperature, data.threads, data.random);
dynamics->setReferenceConstraintAlgorithm(&extractConstraints(context));
prevTemp = temperature;
prevFriction = friction;
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
/* Portions copyright (c) 2006-2016 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
......@@ -59,8 +59,8 @@ public:
CpuLangevinDynamics& owner;
};
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, tau, temperature), threads(threads), random(random) {
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM friction, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
}
CpuLangevinDynamics::~CpuLangevinDynamics() {
......@@ -120,11 +120,12 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo
}
void CpuLangevinDynamics::threadUpdate1(int threadIndex) {
const RealOpenMM tau = getTau();
const RealOpenMM vscale = EXP(-getDeltaT()/tau);
const RealOpenMM fscale = (1-vscale)*tau;
RealOpenMM dt = getDeltaT();
RealOpenMM friction = getFriction();
const RealOpenMM vscale = EXP(-dt*friction);
const RealOpenMM fscale = (friction == 0 ? dt : (1-vscale)/friction);
const RealOpenMM kT = BOLTZ*getTemperature();
const RealOpenMM noisescale = SQRT(2*kT/tau)*SQRT(0.5*(1-vscale*vscale)*tau);
const RealOpenMM noisescale = SQRT(kT*(1-vscale*vscale));
int start = threadIndex*numberOfAtoms/threads.getNumThreads();
int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
......
......@@ -30,6 +30,7 @@
#include "ReferencePME.h"
#include "openmm/internal/gmx_atomic.h"
#include <algorithm>
#include <iostream>
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
......@@ -57,7 +58,8 @@ public:
--------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) {
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
}
CpuNonbondedForce::~CpuNonbondedForce() {
......@@ -78,11 +80,22 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
tableIsValid = false;
cutoff = true;
cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
if(alphaDispersionEwald != 0.0f){
// We set this here, in case setUseCutoff is called after the dispersion alpha is set.
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
/**---------------------------------------------------------------------------------------
Set the force to use a switching function on the Lennard-Jones interaction.
......@@ -96,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
switchingDistance = distance;
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff
......@@ -106,7 +119,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
......@@ -126,9 +139,9 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
......@@ -139,7 +152,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
......@@ -148,9 +161,9 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
numRz = kmaxz;
ewald = true;
tabulateEwaldScaleFactor();
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
......@@ -159,7 +172,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
......@@ -168,10 +181,40 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
meshDim[2] = meshSize[2];
pme = true;
tabulateEwaldScaleFactor();
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseLJPME(float alpha, int meshSize[3]) {
if (alpha != alphaDispersionEwald)
expTableIsValid = false;
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
tabulateExpTerms();
if(cutoffDistance != 0.0f){
// We set this here, in case setUseLJPME is called after the cutoff is set
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if (tableIsValid)
return;
tableIsValid = true;
......@@ -188,8 +231,28 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
}
}
void CpuNonbondedForce::tabulateExpTerms() {
if (expTableIsValid)
return;
expTableIsValid = true;
exptermsDX = cutoffDistance/NUM_TABLE_POINTS;
exptermsDXInv = 1.0f/exptermsDX;
exptermsTable.resize(NUM_TABLE_POINTS+4);
dExptermsTable.resize(NUM_TABLE_POINTS+4);
for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
double r = i*ewaldDX;
double dalphaR = alphaDispersionEwald*r;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
exptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
dExptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
const vector<pair<float, float> >& atomParameters, const vector<float> &C6params, const vector<set<int> >& exclusions,
vector<RealVec>& forces, double* totalEnergy) const {
typedef std::complex<float> d_complex;
......@@ -211,6 +274,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
if (totalEnergy)
*totalEnergy += recipEnergy;
pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<RealVec> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = (RealOpenMM)C6params[i];
dpmeforces.push_back(RealVec());
}
RealOpenMM recipDispersionEnergy = 0.0;
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
}
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
pme_destroy(pmedata);
}
}
// Ewald method
......@@ -224,7 +310,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms);
......@@ -301,13 +387,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
const vector<float>& C6params, const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
this->posq = posq;
this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0];
this->C6params = &C6params[0];
this->exclusions = &exclusions[0];
this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL);
......@@ -319,7 +406,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this);
threads.execute(task);
threads.execute(task); // ACS calls threadcomputedirect
threads.waitForThreads();
// Signal the threads to subtract the exclusions.
......@@ -350,9 +437,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float* forces = &(*threadForce)[threadIndex][0];
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (ewald || pme) {
if (ewald || pme || ljpme) {
// Compute the interactions from the neighbor list.
while (true) {
int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
if (nextBlock >= neighborList->getNumBlocks())
......@@ -395,6 +481,17 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
}
else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
if (ljpme) {
float C6ij = C6params[i]*C6params[j];
float inverseR2 = 1.0f/r2;
float emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
if(includeEnergy)
threadEnergy[threadIndex] += emult;
float dEdR = -6.0f*C6ij*inverseR2*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j);
}
}
}
}
......@@ -476,7 +573,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj);
}
}
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
deltaR = posJ-posI;
......@@ -502,3 +599,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
}
float CpuNonbondedForce::exptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*exptermsTable[index] + coeff2*exptermsTable[index+1];
}
float CpuNonbondedForce::dExptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*dExptermsTable[index] + coeff2*dExptermsTable[index+1];
}
......@@ -25,6 +25,7 @@
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
#include <algorithm>
#include <iostream>
using namespace std;
using namespace OpenMM;
......@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
......@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
......@@ -278,6 +277,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
fvec4 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
......@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 sig2 = inverseR*sig;
sig2 *= sig2;
fvec4 sig6 = sig2*sig2*sig2;
fvec4 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6;
fvec4 eps = blockAtomEpsilon*atomEpsilon;
fvec4 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f);
if (useSwitch) {
......@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue;
}
if (ljpme) {
fvec4 C6ij = C6s*C6params[atom];
fvec4 inverseR2 = inverseR*inverseR;
fvec4 mysig2 = sig*sig;
fvec4 mysig6 = mysig2*mysig2*mysig2;
fvec4 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec4 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
}
else {
energy = 0.0f;
......@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::exptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&exptermsTable[index[0]]);
fvec4 t2(&exptermsTable[index[1]]);
fvec4 t3(&exptermsTable[index[2]]);
fvec4 t4(&exptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::dExptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&dExptermsTable[index[0]]);
fvec4 t2(&dExptermsTable[index[1]]);
fvec4 t3(&dExptermsTable[index[2]]);
fvec4 t4(&dExptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
......@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <iostream>
using namespace std;
using namespace OpenMM;
......@@ -81,7 +82,6 @@ enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, Periodic
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
......@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
fvec8 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]], C6params[blockAtom[4]], C6params[blockAtom[5]], C6params[blockAtom[6]], C6params[blockAtom[7]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
......@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec8 sig2 = inverseR*sig;
sig2 *= sig2;
fvec8 sig6 = sig2*sig2*sig2;
fvec8 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6;
fvec8 eps = blockAtomEpsilon*atomEpsilon;
fvec8 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f);
if (useSwitch) {
......@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue;
}
if (ljpme) {
fvec8 C6ij = C6s*C6params[atom];
fvec8 inverseR2 = inverseR*inverseR;
fvec8 mysig2 = sig*sig;
fvec8 mysig6 = mysig2*mysig2*mysig2;
fvec8 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec8 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
}
else {
energy = 0.0f;
......@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::exptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&exptermsTable[indexLower[0]]);
fvec4 t2(&exptermsTable[indexLower[1]]);
fvec4 t3(&exptermsTable[indexLower[2]]);
fvec4 t4(&exptermsTable[indexLower[3]]);
fvec4 t5(&exptermsTable[indexUpper[0]]);
fvec4 t6(&exptermsTable[indexUpper[1]]);
fvec4 t7(&exptermsTable[indexUpper[2]]);
fvec4 t8(&exptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::dExptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&dExptermsTable[indexLower[0]]);
fvec4 t2(&dExptermsTable[indexLower[1]]);
fvec4 t3(&dExptermsTable[indexLower[2]]);
fvec4 t4(&dExptermsTable[indexLower[3]]);
fvec4 t5(&dExptermsTable[indexUpper[0]]);
fvec4 t6(&dExptermsTable[indexUpper[1]]);
fvec4 t7(&dExptermsTable[indexUpper[2]]);
fvec4 t8(&dExptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
#endif
......@@ -127,6 +127,8 @@ void CpuPlatform::contextDestroyed(ContextImpl& context) const {
PlatformData* data = contextData[&context];
delete data;
contextData.erase(&context);
ReferencePlatform::PlatformData* refPlatformData = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
delete refPlatformData;
}
CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) {
......
......@@ -599,7 +599,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) {
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL),
pmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
}
~CudaCalcNonbondedForceKernel();
/**
......@@ -636,6 +637,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class SortTrait : public CudaSort::SortTrait {
int getDataSize() const {return 8;}
......@@ -667,26 +677,36 @@ private:
CudaArray* pmeEnergyBuffer;
CudaSort* sort;
Kernel cpuPme;
Kernel cpuDispersionPme;
PmeIO* pmeio;
CUstream pmeStream;
CUevent pmeSyncEvent;
CudaFFT3D* fft;
cufftHandle fftForward;
cufftHandle fftBackward;
CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward;
CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel;
CUfunction pmeDispersionGridIndexKernel;
CUfunction pmeSpreadChargeKernel;
CUfunction pmeDispersionSpreadChargeKernel;
CUfunction pmeFinishSpreadChargeKernel;
CUfunction pmeDispersionFinishSpreadChargeKernel;
CUfunction pmeEvalEnergyKernel;
CUfunction pmeEvalDispersionEnergyKernel;
CUfunction pmeConvolutionKernel;
CUfunction pmeDispersionConvolutionKernel;
CUfunction pmeInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines;
CUfunction pmeInterpolateDispersionForceKernel;
std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha;
double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int interpolateForceThreads;
int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME;
NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5;
};
......@@ -1432,7 +1452,7 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target);
void recordGlobalValue(double value, GlobalTarget target, CustomIntegrator& integrator);
void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step);
CudaContext& cu;
......
......@@ -78,8 +78,9 @@ public:
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
* @param supportsPairList specifies whether this interaction can work with a neighbor list that uses a separate pair list
*/
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup);
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool supportsPairList=false);
/**
* Add a per-atom parameter that the default interaction kernel may depend on.
*/
......@@ -189,6 +190,12 @@ public:
CudaArray& getInteractingAtoms() {
return *interactingAtoms;
}
/**
* Get the array containing single pairs in the neighbor list.
*/
CudaArray& getSinglePairs() {
return *singlePairs;
}
/**
* Get the array containing exclusion flags.
*/
......@@ -270,6 +277,8 @@ private:
CudaArray* interactingTiles;
CudaArray* interactingAtoms;
CudaArray* interactionCount;
CudaArray* singlePairs;
CudaArray* singlePairCount;
CudaArray* blockCenter;
CudaArray* blockBoundingBox;
CudaArray* sortedBlocks;
......@@ -288,8 +297,8 @@ private:
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
};
/**
......
......@@ -83,7 +83,7 @@ private:
std::vector<Kernel> kernels;
std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions;
int* tileCounts;
int2* interactionCounts;
CudaArray* contextForces;
void* pinnedPositionBuffer;
long long* pinnedForceBuffer;
......@@ -439,6 +439,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class Task;
CudaPlatform::PlatformData& data;
......
......@@ -52,6 +52,7 @@
#include <set>
#include <sstream>
#include <typeinfo>
#include <sys/stat.h>
#include <cudaProfiler.h>
#ifndef WIN32
#include <unistd.h>
......@@ -127,9 +128,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null";
int res = std::system(testCompilerCommand.c_str());
#endif
isNvccAvailable = (res == 0);
struct stat info;
isNvccAvailable = (res == 0 && stat(tempDir.c_str(), &info) == 0);
int cudaDriverVersion;
cuDriverGetVersion(&cudaDriverVersion);
static bool hasShownNvccWarning = false;
if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning) {
if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning && cudaDriverVersion < 8000) {
hasShownNvccWarning = true;
printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. ");
#ifdef WIN32
......@@ -205,14 +209,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
#if __CUDA_API_VERSION < 7000
int numThreadBlocksPerComputeUnit = (major >= 6 ? 4 : 6);
if (cudaDriverVersion < 7000) {
// This is a workaround to support GTX 980 with CUDA 6.5. It reports
// its compute capability as 5.2, but the compiler doesn't support
// anything beyond 5.0.
if (major == 5)
minor = 0;
#endif
#if __CUDA_API_VERSION < 8000
}
if (cudaDriverVersion < 8000) {
// This is a workaround to support Pascal with CUDA 7.5. It reports
// its compute capability as 6.x, but the compiler doesn't support
// anything beyond 5.3.
......@@ -220,7 +225,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
major = 5;
minor = 3;
}
#endif
}
gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor;
......@@ -241,7 +246,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int numThreadBlocksPerComputeUnit = 6;
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
if (useDoublePrecision) {
posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment