Commit 3b6925ae authored by Andy Simmonett's avatar Andy Simmonett Committed by GitHub
Browse files

Merge pull request #1 from peastman/ljpme

Cleanup to LJ PME code
parents 5a8a8aa9 f7a102fb
...@@ -129,6 +129,7 @@ private: ...@@ -129,6 +129,7 @@ private:
bool useSwitch; bool useSwitch;
bool periodic; bool periodic;
bool triclinic; bool triclinic;
bool useInteractionGroups;
const CpuNeighborList* neighborList; const CpuNeighborList* neighborList;
float recipBoxSize[3]; float recipBoxSize[3];
RealVec periodicBoxVectors[3]; RealVec periodicBoxVectors[3];
...@@ -183,8 +184,8 @@ public: ...@@ -183,8 +184,8 @@ public:
Lepton::CompiledExpression forceExpression; Lepton::CompiledExpression forceExpression;
std::vector<Lepton::CompiledExpression> energyParamDerivExpressions; std::vector<Lepton::CompiledExpression> energyParamDerivExpressions;
CompiledExpressionSet expressionSet; CompiledExpressionSet expressionSet;
std::vector<int> particleParamIndex; std::vector<double> particleParam;
int rIndex; double r;
std::vector<RealOpenMM> energyParamDerivs; std::vector<RealOpenMM> energyParamDerivs;
}; };
......
...@@ -258,20 +258,30 @@ public: ...@@ -258,20 +258,30 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class PmeIO; class PmeIO;
CpuPlatform::PlatformData& data; CpuPlatform::PlatformData& data;
int numParticles, num14; int numParticles, num14;
int **bonded14IndexArray; int **bonded14IndexArray;
double **bonded14ParamArray; double **bonded14ParamArray;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldSelfEnergy, dispersionCoefficient; double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient;
int kmax[3], gridSize[3]; int kmax[3], gridSize[3], dispersionGridSize[3];
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme; bool useSwitchingFunction, useOptimizedPme, hasInitializedPme, hasInitializedDispersionPme;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams; std::vector<std::pair<float, float> > particleParams;
std::vector<float> C6params;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuNonbondedForce* nonbonded; CpuNonbondedForce* nonbonded;
Kernel optimizedPme; Kernel optimizedPme, optimizedDispersionPme;
CpuBondForce bondForce; CpuBondForce bondForce;
}; };
......
/* Portions copyright (c) 2013-2015 Stanford University and Simbios. /* Portions copyright (c) 2013-2016 Stanford University and Simbios.
* Authors: Peter Eastman * Authors: Peter Eastman
* Contributors: * Contributors:
* *
...@@ -43,12 +43,12 @@ public: ...@@ -43,12 +43,12 @@ public:
* *
* @param numberOfAtoms number of atoms * @param numberOfAtoms number of atoms
* @param deltaT delta t for dynamics * @param deltaT delta t for dynamics
* @param tau viscosity * @param friction friction coefficient
* @param temperature temperature * @param temperature temperature
* @param threads thread pool for parallelizing computation * @param threads thread pool for parallelizing computation
* @param random random number generator * @param random random number generator
*/ */
CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, OpenMM::ThreadPool& threads, OpenMM::CpuRandom& random); CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM friction, RealOpenMM temperature, OpenMM::ThreadPool& threads, OpenMM::CpuRandom& random);
/** /**
* Destructor. * Destructor.
......
...@@ -114,6 +114,17 @@ class CpuNonbondedForce { ...@@ -114,6 +114,17 @@ class CpuNonbondedForce {
void setUsePME(float alpha, int meshSize[3]); void setUsePME(float alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void setUseLJPME(float alpha, int meshSize[3]);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Calculate Ewald ixn Calculate Ewald ixn
...@@ -122,6 +133,7 @@ class CpuNonbondedForce { ...@@ -122,6 +133,7 @@ class CpuNonbondedForce {
@param posq atom coordinates and charges @param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME) @param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon)) @param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param C6Paramrs C6 parameters for multiplicative representation of dispersion
@param exclusions atom exclusion indices @param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added) @param forces force array (forces added)
...@@ -130,8 +142,8 @@ class CpuNonbondedForce { ...@@ -130,8 +142,8 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions, const std::vector<std::pair<float, float> >& atomParameters, const std::vector<float> &C6params,
std::vector<RealVec>& forces, double* totalEnergy) const; const std::vector<std::set<int> >& exclusions, std::vector<RealVec>& forces, double* totalEnergy) const;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -150,7 +162,7 @@ class CpuNonbondedForce { ...@@ -150,7 +162,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters, void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads); const std::vector<float>& C6params, const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
...@@ -163,28 +175,32 @@ protected: ...@@ -163,28 +175,32 @@ protected:
bool periodic; bool periodic;
bool triclinic; bool triclinic;
bool ewald; bool ewald;
bool pme; bool ljpme, pme;
bool tableIsValid; bool tableIsValid, expTableIsValid;
const CpuNeighborList* neighborList; const CpuNeighborList* neighborList;
float recipBoxSize[3]; float recipBoxSize[3];
RealVec periodicBoxVectors[3]; RealVec periodicBoxVectors[3];
AlignedArray<fvec4> periodicBoxVec4; AlignedArray<fvec4> periodicBoxVec4;
float cutoffDistance, switchingDistance; float cutoffDistance, switchingDistance;
float krf, crf; float krf, crf;
float alphaEwald; float alphaEwald, alphaDispersionEwald;
int numRx, numRy, numRz; int numRx, numRy, numRz;
int meshDim[3]; int meshDim[3], dispersionMeshDim[3];
std::vector<float> erfcTable, ewaldScaleTable; std::vector<float> erfcTable, ewaldScaleTable;
float ewaldDX, ewaldDXInv, erfcDXInv; std::vector<float> exptermsTable, dExptermsTable;
float ewaldDX, ewaldDXInv, erfcDXInv, exptermsDX, exptermsDXInv;
std::vector<double> threadEnergy; std::vector<double> threadEnergy;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
int numberOfAtoms; int numberOfAtoms;
float* posq; float* posq;
RealVec const* atomCoordinates; RealVec const* atomCoordinates;
std::pair<float, float> const* atomParameters; std::pair<float, float> const* atomParameters;
float const *C6params;
std::set<int> const* exclusions; std::set<int> const* exclusions;
std::vector<AlignedArray<float> >* threadForce; std::vector<AlignedArray<float> >* threadForce;
bool includeEnergy; bool includeEnergy;
float inverseRcut6;
float inverseRcut6Expterm;
void* atomicCounter; void* atomicCounter;
static const float TWO_OVER_SQRT_PI; static const float TWO_OVER_SQRT_PI;
...@@ -238,10 +254,29 @@ protected: ...@@ -238,10 +254,29 @@ protected:
*/ */
void tabulateEwaldScaleFactor(); void tabulateEwaldScaleFactor();
/**
* Create a lookup table for the scale factor used with dispersion PME.
*/
void tabulateExpTerms();
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
float erfcApprox(float x); float erfcApprox(float x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
float exptermsApprox(float R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
float dExptermsApprox(float R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -93,6 +93,20 @@ protected: ...@@ -93,6 +93,20 @@ protected:
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/ */
fvec4 ewaldScaleFunction(const fvec4& x); fvec4 ewaldScaleFunction(const fvec4& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec4 exptermsApprox(const fvec4& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec4 dExptermsApprox(const fvec4& R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -92,6 +92,21 @@ protected: ...@@ -92,6 +92,21 @@ protected:
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/ */
fvec8 ewaldScaleFunction(const fvec8& x); fvec8 ewaldScaleFunction(const fvec8& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec8 exptermsApprox(const fvec8& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec8 dExptermsApprox(const fvec8& R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -59,47 +59,68 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa ...@@ -59,47 +59,68 @@ CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threa
energyGradientExpressions(energyGradientExpressions), energyParamDerivExpressions(energyParamDerivExpressions) { energyGradientExpressions(energyGradientExpressions), energyParamDerivExpressions(energyParamDerivExpressions) {
firstAtom = (threadIndex*(long long) numAtoms)/numThreads; firstAtom = (threadIndex*(long long) numAtoms)/numThreads;
lastAtom = ((threadIndex+1)*(long long) numAtoms)/numThreads; lastAtom = ((threadIndex+1)*(long long) numAtoms)/numThreads;
for (int i = 0; i < (int) valueExpressions.size(); i++) map<string, double*> variableLocations;
variableLocations["x"] = &x;
variableLocations["y"] = &y;
variableLocations["z"] = &z;
variableLocations["r"] = &r;
param.resize(parameterNames.size());
particleParam.resize(parameterNames.size()*2);
for (int i = 0; i < (int) parameterNames.size(); i++) {
variableLocations[parameterNames[i]] = &param[i];
for (int j = 0; j < 2; j++) {
stringstream name;
name << parameterNames[i] << (j+1);
variableLocations[name.str()] = &particleParam[2*i+j];
}
}
value.resize(valueNames.size());
particleValue.resize(valueNames.size()*2);
for (int i = 0; i < (int) valueNames.size(); i++) {
variableLocations[valueNames[i]] = &value[i];
for (int j = 0; j < 2; j++) {
stringstream name;
name << valueNames[i] << (j+1);
variableLocations[name.str()] = &particleValue[2*i+j];
}
}
for (int i = 0; i < (int) valueExpressions.size(); i++) {
this->valueExpressions[i].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->valueExpressions[i]); expressionSet.registerExpression(this->valueExpressions[i]);
}
for (int i = 0; i < (int) valueDerivExpressions.size(); i++) for (int i = 0; i < (int) valueDerivExpressions.size(); i++)
for (int j = 0; j < (int) valueDerivExpressions[i].size(); j++) for (int j = 0; j < (int) valueDerivExpressions[i].size(); j++) {
this->valueDerivExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->valueDerivExpressions[i][j]); expressionSet.registerExpression(this->valueDerivExpressions[i][j]);
}
for (int i = 0; i < (int) valueGradientExpressions.size(); i++) for (int i = 0; i < (int) valueGradientExpressions.size(); i++)
for (int j = 0; j < (int) valueGradientExpressions[i].size(); j++) for (int j = 0; j < (int) valueGradientExpressions[i].size(); j++) {
this->valueGradientExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->valueGradientExpressions[i][j]); expressionSet.registerExpression(this->valueGradientExpressions[i][j]);
}
for (int i = 0; i < (int) valueParamDerivExpressions.size(); i++) for (int i = 0; i < (int) valueParamDerivExpressions.size(); i++)
for (int j = 0; j < (int) valueParamDerivExpressions[i].size(); j++) for (int j = 0; j < (int) valueParamDerivExpressions[i].size(); j++) {
this->valueParamDerivExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->valueParamDerivExpressions[i][j]); expressionSet.registerExpression(this->valueParamDerivExpressions[i][j]);
for (int i = 0; i < (int) energyExpressions.size(); i++) }
for (int i = 0; i < (int) energyExpressions.size(); i++) {
this->energyExpressions[i].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyExpressions[i]); expressionSet.registerExpression(this->energyExpressions[i]);
}
for (int i = 0; i < (int) energyDerivExpressions.size(); i++) for (int i = 0; i < (int) energyDerivExpressions.size(); i++)
for (int j = 0; j < (int) energyDerivExpressions[i].size(); j++) for (int j = 0; j < (int) energyDerivExpressions[i].size(); j++) {
this->energyDerivExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyDerivExpressions[i][j]); expressionSet.registerExpression(this->energyDerivExpressions[i][j]);
}
for (int i = 0; i < (int) energyGradientExpressions.size(); i++) for (int i = 0; i < (int) energyGradientExpressions.size(); i++)
for (int j = 0; j < (int) energyGradientExpressions[i].size(); j++) for (int j = 0; j < (int) energyGradientExpressions[i].size(); j++) {
this->energyGradientExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyGradientExpressions[i][j]); expressionSet.registerExpression(this->energyGradientExpressions[i][j]);
}
for (int i = 0; i < (int) energyParamDerivExpressions.size(); i++) for (int i = 0; i < (int) energyParamDerivExpressions.size(); i++)
for (int j = 0; j < (int) energyParamDerivExpressions[i].size(); j++) for (int j = 0; j < (int) energyParamDerivExpressions[i].size(); j++) {
this->energyParamDerivExpressions[i][j].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyParamDerivExpressions[i][j]); expressionSet.registerExpression(this->energyParamDerivExpressions[i][j]);
xindex = expressionSet.getVariableIndex("x");
yindex = expressionSet.getVariableIndex("y");
zindex = expressionSet.getVariableIndex("z");
rindex = expressionSet.getVariableIndex("r");
for (int i = 0; i < (int) parameterNames.size(); i++) {
paramIndex.push_back(expressionSet.getVariableIndex(parameterNames[i]));
for (int j = 1; j < 3; j++) {
stringstream name;
name << parameterNames[i] << j;
particleParamIndex.push_back(expressionSet.getVariableIndex(name.str()));
}
}
for (int i = 0; i < (int) valueNames.size(); i++) {
valueIndex.push_back(expressionSet.getVariableIndex(valueNames[i]));
for (int j = 1; j < 3; j++) {
stringstream name;
name << valueNames[i] << j;
particleValueIndex.push_back(expressionSet.getVariableIndex(name.str()));
}
} }
value0.resize(numAtoms); value0.resize(numAtoms);
dEdV.resize(valueNames.size()); dEdV.resize(valueNames.size());
...@@ -283,13 +304,13 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex) ...@@ -283,13 +304,13 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
for (int j = 0; j < (int) threadData.size(); j++) for (int j = 0; j < (int) threadData.size(); j++)
sum += threadData[j]->value0[atom]; sum += threadData[j]->value0[atom];
values[0][atom] = sum; values[0][atom] = sum;
data.expressionSet.setVariable(data.xindex, posq[4*atom]); data.x = posq[4*atom];
data.expressionSet.setVariable(data.yindex, posq[4*atom+1]); data.y = posq[4*atom+1];
data.expressionSet.setVariable(data.zindex, posq[4*atom+2]); data.z = posq[4*atom+2];
for (int j = 0; j < numParams; j++) for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[atom][j]); data.param[j] = atomParameters[atom][j];
for (int i = 1; i < numValues; i++) { for (int i = 1; i < numValues; i++) {
data.expressionSet.setVariable(data.valueIndex[i-1], values[i-1][atom]); data.value[i-1] = values[i-1][atom];
values[i][atom] = (float) data.valueExpressions[i].evaluate(); values[i][atom] = (float) data.valueExpressions[i].evaluate();
// Calculate derivatives with respect to parameters. // Calculate derivatives with respect to parameters.
...@@ -397,15 +418,14 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th ...@@ -397,15 +418,14 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
getDeltaR(pos2, pos1, deltaR, r2, periodic, boxSize, invBoxSize); getDeltaR(pos2, pos1, deltaR, r2, periodic, boxSize, invBoxSize);
if (cutoff && r2 >= cutoffDistance2) if (cutoff && r2 >= cutoffDistance2)
return; return;
float r = sqrtf(r2); data.r = sqrtf(r2);
for (int i = 0; i < numParams; i++) { for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]); data.particleParam[i*2] = atomParameters[atom1][i];
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]); data.particleParam[i*2+1] = atomParameters[atom2][i];
} }
data.expressionSet.setVariable(data.rindex, r);
for (int i = 0; i < index; i++) { for (int i = 0; i < index; i++) {
data.expressionSet.setVariable(data.particleValueIndex[i*2], values[i][atom1]); data.particleValue[i*2] = values[i][atom1];
data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]); data.particleValue[i*2+1] = values[i][atom2];
} }
valueArray[atom1] += (float) data.valueExpressions[index].evaluate(); valueArray[atom1] += (float) data.valueExpressions[index].evaluate();
...@@ -418,13 +438,13 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th ...@@ -418,13 +438,13 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq, void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq,
RealOpenMM** atomParameters, float* forces, double& totalEnergy) { RealOpenMM** atomParameters, float* forces, double& totalEnergy) {
for (int i = data.firstAtom; i < data.lastAtom; i++) { for (int i = data.firstAtom; i < data.lastAtom; i++) {
data.expressionSet.setVariable(data.xindex, posq[4*i]); data.x = posq[4*i];
data.expressionSet.setVariable(data.yindex, posq[4*i+1]); data.y = posq[4*i+1];
data.expressionSet.setVariable(data.zindex, posq[4*i+2]); data.z = posq[4*i+2];
for (int j = 0; j < numParams; j++) for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]); data.param[j] = atomParameters[i][j];
for (int j = 0; j < (int) values.size(); j++) for (int j = 0; j < (int) values.size(); j++)
data.expressionSet.setVariable(data.valueIndex[j], values[j][i]); data.value[j] = values[j][i];
if (includeEnergy) if (includeEnergy)
totalEnergy += (float) data.energyExpressions[index].evaluate(); totalEnergy += (float) data.energyExpressions[index].evaluate();
for (int j = 0; j < (int) values.size(); j++) for (int j = 0; j < (int) values.size(); j++)
...@@ -494,17 +514,17 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom ...@@ -494,17 +514,17 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom
if (cutoff && r2 >= cutoffDistance2) if (cutoff && r2 >= cutoffDistance2)
return; return;
float r = sqrtf(r2); float r = sqrtf(r2);
data.r = r;
// Record variables for evaluating expressions. // Record variables for evaluating expressions.
for (int i = 0; i < numParams; i++) { for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]); data.particleParam[i*2] = atomParameters[atom1][i];
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]); data.particleParam[i*2+1] = atomParameters[atom2][i];
} }
data.expressionSet.setVariable(data.rindex, r);
for (int i = 0; i < (int) values.size(); i++) { for (int i = 0; i < (int) values.size(); i++) {
data.expressionSet.setVariable(data.particleValueIndex[i*2], values[i][atom1]); data.particleValue[i*2] = values[i][atom1];
data.expressionSet.setVariable(data.particleValueIndex[i*2+1], values[i][atom2]); data.particleValue[i*2+1] = values[i][atom2];
} }
// Evaluate the energy and its derivatives. // Evaluate the energy and its derivatives.
...@@ -571,13 +591,13 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, ...@@ -571,13 +591,13 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
// Compute chain rule terms for computed values that depend explicitly on particle coordinates. // Compute chain rule terms for computed values that depend explicitly on particle coordinates.
for (int i = data.firstAtom; i < data.lastAtom; i++) { for (int i = data.firstAtom; i < data.lastAtom; i++) {
data.expressionSet.setVariable(data.xindex, posq[4*i]); data.x = posq[4*i];
data.expressionSet.setVariable(data.yindex, posq[4*i+1]); data.y = posq[4*i+1];
data.expressionSet.setVariable(data.zindex, posq[4*i+2]); data.z = posq[4*i+2];
for (int j = 0; j < numParams; j++) for (int j = 0; j < numParams; j++)
data.expressionSet.setVariable(data.paramIndex[j], atomParameters[i][j]); data.param[j] = atomParameters[i][j];
for (int j = 1; j < (int) values.size(); j++) { for (int j = 1; j < (int) values.size(); j++) {
data.expressionSet.setVariable(data.valueIndex[j-1], values[j-1][i]); data.value[j-1] = values[j-1][i];
data.dVdX[j] = 0.0; data.dVdX[j] = 0.0;
data.dVdY[j] = 0.0; data.dVdY[j] = 0.0;
data.dVdZ[j] = 0.0; data.dVdZ[j] = 0.0;
...@@ -599,7 +619,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, ...@@ -599,7 +619,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
// Compute chain rule terms for derivatives with respect to parameters. // Compute chain rule terms for derivatives with respect to parameters.
for (int i = data.firstAtom; i < data.lastAtom; i++) for (int i = data.firstAtom; i < data.lastAtom; i++)
for (int j = 0; j < data.valueIndex.size(); j++) for (int j = 0; j < data.value.size(); j++)
for (int k = 0; k < dValuedParam[j].size(); k++) for (int k = 0; k < dValuedParam[j].size(); k++)
data.energyParamDerivs[k] += dEdV[j][i]*dValuedParam[j][k][i]; data.energyParamDerivs[k] += dEdV[j][i]*dValuedParam[j][k][i];
} }
...@@ -616,21 +636,21 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat ...@@ -616,21 +636,21 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat
if (cutoff && r2 >= cutoffDistance2) if (cutoff && r2 >= cutoffDistance2)
return; return;
float r = sqrtf(r2); float r = sqrtf(r2);
data.r = r;
// Record variables for evaluating expressions. // Record variables for evaluating expressions.
for (int i = 0; i < numParams; i++) { for (int i = 0; i < numParams; i++) {
data.expressionSet.setVariable(data.particleParamIndex[i*2], atomParameters[atom1][i]); data.particleParam[i*2] = atomParameters[atom1][i];
data.expressionSet.setVariable(data.particleParamIndex[i*2+1], atomParameters[atom2][i]); data.particleParam[i*2+1] = atomParameters[atom2][i];
data.expressionSet.setVariable(data.paramIndex[i], atomParameters[atom1][i]); data.param[i] = atomParameters[atom1][i];
} }
data.expressionSet.setVariable(data.valueIndex[0], values[0][atom1]); data.value[0] = values[0][atom1];
data.expressionSet.setVariable(data.xindex, posq[4*atom1]); data.x = posq[4*atom1];
data.expressionSet.setVariable(data.yindex, posq[4*atom1+1]); data.y = posq[4*atom1+1];
data.expressionSet.setVariable(data.zindex, posq[4*atom1+2]); data.z = posq[4*atom1+2];
data.expressionSet.setVariable(data.rindex, r); data.particleValue[0] = values[0][atom1];
data.expressionSet.setVariable(data.particleValueIndex[0], values[0][atom1]); data.particleValue[1] = values[0][atom2];
data.expressionSet.setVariable(data.particleValueIndex[1], values[0][atom2]);
// Evaluate the derivative of each parameter with respect to position and apply forces. // Evaluate the derivative of each parameter with respect to position and apply forces.
...@@ -644,7 +664,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat ...@@ -644,7 +664,7 @@ void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadDat
f2 -= deltaR*(dEdV[0][atom1]*data.dVdR2[0]); f2 -= deltaR*(dEdV[0][atom1]*data.dVdR2[0]);
} }
for (int i = 1; i < (int) values.size(); i++) { for (int i = 1; i < (int) values.size(); i++) {
data.expressionSet.setVariable(data.valueIndex[i], values[i][atom1]); data.value[i] = values[i][atom1];
data.dVdR1[i] = 0.0; data.dVdR1[i] = 0.0;
data.dVdR2[i] = 0.0; data.dVdR2[i] = 0.0;
for (int j = 0; j < i; j++) { for (int j = 0; j < i; j++) {
......
...@@ -46,25 +46,31 @@ public: ...@@ -46,25 +46,31 @@ public:
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) : const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) { energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
expressionSet.registerExpression(this->energyExpression); map<string, double*> variableLocations;
expressionSet.registerExpression(this->forceExpression); variableLocations["r"] = &r;
for (int i = 0; i < this->energyParamDerivExpressions.size(); i++) particleParam.resize(2*parameterNames.size());
expressionSet.registerExpression(this->energyParamDerivExpressions[i]);
rIndex = expressionSet.getVariableIndex("r");
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
for (int j = 1; j < 3; j++) { for (int j = 0; j < 2; j++) {
stringstream name; stringstream name;
name << parameterNames[i] << j; name << parameterNames[i] << (j+1);
particleParamIndex.push_back(expressionSet.getVariableIndex(name.str())); variableLocations[name.str()] = &particleParam[i*2+j];
} }
} }
energyParamDerivs.resize(energyParamDerivExpressions.size()); energyParamDerivs.resize(energyParamDerivExpressions.size());
this->energyExpression.setVariableLocations(variableLocations);
this->forceExpression.setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyExpression);
expressionSet.registerExpression(this->forceExpression);
for (int i = 0; i < this->energyParamDerivExpressions.size(); i++) {
this->energyParamDerivExpressions[i].setVariableLocations(variableLocations);
expressionSet.registerExpression(this->energyParamDerivExpressions[i]);
}
} }
CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression, CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression,
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions, const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) : const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, ThreadPool& threads) :
cutoff(false), useSwitch(false), periodic(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) { cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), paramNames(parameterNames), exclusions(exclusions), threads(threads) {
for (int i = 0; i < threads.getNumThreads(); i++) for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions)); threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions));
} }
...@@ -81,6 +87,7 @@ void CpuCustomNonbondedForce::setUseCutoff(RealOpenMM distance, const CpuNeighbo ...@@ -81,6 +87,7 @@ void CpuCustomNonbondedForce::setUseCutoff(RealOpenMM distance, const CpuNeighbo
} }
void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) { void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) {
useInteractionGroups = true;
for (int group = 0; group < (int) groups.size(); group++) { for (int group = 0; group < (int) groups.size(); group++) {
const set<int>& set1 = groups[group].first; const set<int>& set1 = groups[group].first;
const set<int>& set2 = groups[group].second; const set<int>& set2 = groups[group].second;
...@@ -177,7 +184,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -177,7 +184,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
data.energyParamDerivs[i] = 0.0; data.energyParamDerivs[i] = 0.0;
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0); fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0); fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (groupInteractions.size() > 0) { if (useInteractionGroups) {
// The user has specified interaction groups, so compute only the requested interactions. // The user has specified interaction groups, so compute only the requested interactions.
while (true) { while (true) {
...@@ -187,8 +194,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -187,8 +194,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
int atom1 = groupInteractions[i].first; int atom1 = groupInteractions[i].first;
int atom2 = groupInteractions[i].second; int atom2 = groupInteractions[i].second;
for (int j = 0; j < (int) paramNames.size(); j++) { for (int j = 0; j < (int) paramNames.size(); j++) {
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[atom1][j]); data.particleParam[j*2] = atomParameters[atom1][j];
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[atom2][j]); data.particleParam[j*2+1] = atomParameters[atom2][j];
} }
calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize); calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize);
} }
...@@ -207,12 +214,12 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -207,12 +214,12 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i]; int first = neighbors[i];
for (int j = 0; j < (int) paramNames.size(); j++) for (int j = 0; j < (int) paramNames.size(); j++)
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[first][j]); data.particleParam[j*2] = atomParameters[first][j];
for (int k = 0; k < blockSize; k++) { for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) { if ((exclusions[i] & (1<<k)) == 0) {
int second = blockAtom[k]; int second = blockAtom[k];
for (int j = 0; j < (int) paramNames.size(); j++) for (int j = 0; j < (int) paramNames.size(); j++)
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[second][j]); data.particleParam[j*2+1] = atomParameters[second][j];
calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize); calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize);
} }
} }
...@@ -229,8 +236,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -229,8 +236,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
for (int jj = ii+1; jj < numberOfAtoms; jj++) { for (int jj = ii+1; jj < numberOfAtoms; jj++) {
if (exclusions[jj].find(ii) == exclusions[jj].end()) { if (exclusions[jj].find(ii) == exclusions[jj].end()) {
for (int j = 0; j < (int) paramNames.size(); j++) { for (int j = 0; j < (int) paramNames.size(); j++) {
data.expressionSet.setVariable(data.particleParamIndex[j*2], atomParameters[ii][j]); data.particleParam[j*2] = atomParameters[ii][j];
data.expressionSet.setVariable(data.particleParamIndex[j*2+1], atomParameters[jj][j]); data.particleParam[j*2+1] = atomParameters[jj][j];
} }
calculateOneIxn(ii, jj, data, forces, energy, boxSize, invBoxSize); calculateOneIxn(ii, jj, data, forces, energy, boxSize, invBoxSize);
} }
...@@ -251,10 +258,10 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data, ...@@ -251,10 +258,10 @@ void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
if (cutoff && r2 >= cutoffDistance*cutoffDistance) if (cutoff && r2 >= cutoffDistance*cutoffDistance)
return; return;
float r = sqrtf(r2); float r = sqrtf(r2);
data.r = r;
// accumulate forces // accumulate forces
data.expressionSet.setVariable(data.rIndex, r);
double dEdR = (includeForce ? data.forceExpression.evaluate()/r : 0.0); double dEdR = (includeForce ? data.forceExpression.evaluate()/r : 0.0);
double energy = (includeEnergy ? data.energyExpression.evaluate() : 0.0); double energy = (includeEnergy ? data.energyExpression.evaluate() : 0.0);
double switchValue = 1.0; double switchValue = 1.0;
......
/* Portions copyright (c) 2006-2016 Stanford University and Simbios.
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -89,6 +88,10 @@ void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, f ...@@ -89,6 +88,10 @@ void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, f
particleParams = params; particleParams = params;
bornRadii.resize(params.size()+3); bornRadii.resize(params.size()+3);
obcChain.resize(params.size()+3); obcChain.resize(params.size()+3);
for (int i = bornRadii.size()-3; i < bornRadii.size(); i++) {
bornRadii[i] = 0;
obcChain[i] = 0;
}
} }
void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) { void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include "lepton/CustomFunction.h" #include "lepton/CustomFunction.h"
#include "lepton/Operation.h" #include "lepton/Operation.h"
#include "lepton/Parser.h" #include "lepton/Parser.h"
#include <iostream>
#include "lepton/ParsedExpression.h" #include "lepton/ParsedExpression.h"
using namespace OpenMM; using namespace OpenMM;
...@@ -528,7 +529,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4(); ...@@ -528,7 +529,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce* createCpuNonbondedForceVec8(); CpuNonbondedForce* createCpuNonbondedForceVec8();
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform), CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), nonbonded(NULL) { data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
if (isVec8Supported()) if (isVec8Supported())
nonbonded = createCpuNonbondedForceVec8(); nonbonded = createCpuNonbondedForceVec8();
else else
...@@ -575,12 +576,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -575,12 +576,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++) for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3]; bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles); particleParams.resize(numParticles);
C6params.resize(numParticles);
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth; double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth); force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge; data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth))); particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
...@@ -616,19 +619,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -616,19 +619,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
} }
else if (nonbondedMethod == PME) { else if (nonbondedMethod == PME) {
double alpha; double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]); NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha; ewaldAlpha = alpha;
} }
if (nonbondedMethod == Ewald || nonbondedMethod == PME) else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = (RealOpenMM) alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = (RealOpenMM) alpha;
useSwitchingFunction = false;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI); ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
else if(nonbondedMethod == LJPME){
for (int atom = 0; atom < numParticles; atom++) {
// Dispersion self term
ewaldSelfEnergy += pow(ewaldDispersionAlpha, 6.0) * C6params[atom]*C6params[atom] / 12.0;
}
}
} else {
ewaldSelfEnergy = 0.0; ewaldSelfEnergy = 0.0;
}
rfDielectric = force.getReactionFieldDielectric(); rfDielectric = force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection()) if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force); dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME); data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
} }
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
...@@ -646,6 +665,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -646,6 +665,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
} }
} }
if (nonbondedMethod == LJPME) {
// If available, use the optimized PME implementation.
vector<string> kernelNames;
kernelNames.push_back("CalcPmeReciprocalForce");
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
}
}
} }
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context); vector<RealVec>& posData = extractPositions(context);
...@@ -654,6 +687,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -654,6 +687,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0); double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald); bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff) if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric); nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
if (data.isPeriodic) { if (data.isPeriodic) {
...@@ -669,9 +703,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -669,9 +703,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded->setUsePME(ewaldAlpha, gridSize); nonbonded->setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction) if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance); nonbonded->setUseSwitchingFunction(switchingDistance);
if (ljpme){
nonbonded->setUsePME(ewaldAlpha, gridSize);
nonbonded->setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
double nonbondedEnergy = 0; double nonbondedEnergy = 0;
if (includeDirect) if (includeDirect)
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads); nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
if (includeReciprocal) { if (includeReciprocal) {
if (useOptimizedPme) { if (useOptimizedPme) {
PmeIO io(&posq[0], &data.threadForce[0][0], numParticles); PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
...@@ -680,13 +718,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -680,13 +718,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io); nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
} }
else else
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL); nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
} }
energy += nonbondedEnergy; energy += nonbondedEnergy;
if (includeDirect) { if (includeDirect) {
ReferenceLJCoulomb14 nonbonded14; ReferenceLJCoulomb14 nonbonded14;
bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14); bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (data.isPeriodic) if (data.isPeriodic && nonbondedMethod != LJPME)
energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]); energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
} }
return energy; return energy;
...@@ -739,7 +777,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -739,7 +777,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
} }
void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const { void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME) if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME"); throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme) if (useOptimizedPme)
optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz); optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
...@@ -751,6 +789,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ...@@ -751,6 +789,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
} }
} }
void CpuCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedDispersionPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
}
CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) { CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) {
} }
...@@ -1285,8 +1336,7 @@ void CpuIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langevi ...@@ -1285,8 +1336,7 @@ void CpuIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langevi
if (dynamics) if (dynamics)
delete dynamics; delete dynamics;
RealOpenMM tau = (friction == 0.0 ? 0.0 : 1.0/friction); dynamics = new CpuLangevinDynamics(context.getSystem().getNumParticles(), stepSize, friction, temperature, data.threads, data.random);
dynamics = new CpuLangevinDynamics(context.getSystem().getNumParticles(), stepSize, tau, temperature, data.threads, data.random);
dynamics->setReferenceConstraintAlgorithm(&extractConstraints(context)); dynamics->setReferenceConstraintAlgorithm(&extractConstraints(context));
prevTemp = temperature; prevTemp = temperature;
prevFriction = friction; prevFriction = friction;
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-2016 Stanford University and Simbios.
* Authors: Peter Eastman * Authors: Peter Eastman
* Contributors: * Contributors:
* *
...@@ -59,8 +59,8 @@ public: ...@@ -59,8 +59,8 @@ public:
CpuLangevinDynamics& owner; CpuLangevinDynamics& owner;
}; };
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) : CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM friction, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, tau, temperature), threads(threads), random(random) { ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
} }
CpuLangevinDynamics::~CpuLangevinDynamics() { CpuLangevinDynamics::~CpuLangevinDynamics() {
...@@ -120,11 +120,12 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo ...@@ -120,11 +120,12 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCo
} }
void CpuLangevinDynamics::threadUpdate1(int threadIndex) { void CpuLangevinDynamics::threadUpdate1(int threadIndex) {
const RealOpenMM tau = getTau(); RealOpenMM dt = getDeltaT();
const RealOpenMM vscale = EXP(-getDeltaT()/tau); RealOpenMM friction = getFriction();
const RealOpenMM fscale = (1-vscale)*tau; const RealOpenMM vscale = EXP(-dt*friction);
const RealOpenMM fscale = (friction == 0 ? dt : (1-vscale)/friction);
const RealOpenMM kT = BOLTZ*getTemperature(); const RealOpenMM kT = BOLTZ*getTemperature();
const RealOpenMM noisescale = SQRT(2*kT/tau)*SQRT(0.5*(1-vscale*vscale)*tau); const RealOpenMM noisescale = SQRT(kT*(1-vscale*vscale));
int start = threadIndex*numberOfAtoms/threads.getNumThreads(); int start = threadIndex*numberOfAtoms/threads.getNumThreads();
int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads(); int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ReferencePME.h" #include "ReferencePME.h"
#include "openmm/internal/gmx_atomic.h" #include "openmm/internal/gmx_atomic.h"
#include <algorithm> #include <algorithm>
#include <iostream>
// In case we're using some primitive version of Visual Studio this will // In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined. // make sure that erf() and erfc() are defined.
...@@ -57,7 +58,8 @@ public: ...@@ -57,7 +58,8 @@ public:
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) { CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
} }
CpuNonbondedForce::~CpuNonbondedForce() { CpuNonbondedForce::~CpuNonbondedForce() {
...@@ -78,11 +80,22 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig ...@@ -78,11 +80,22 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
tableIsValid = false; tableIsValid = false;
cutoff = true; cutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors; neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0); krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0); crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
if(alphaDispersionEwald != 0.0f){
// We set this here, in case setUseCutoff is called after the dispersion alpha is set.
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
} }
}
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use a switching function on the Lennard-Jones interaction. Set the force to use a switching function on the Lennard-Jones interaction.
...@@ -96,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -96,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
switchingDistance = distance; switchingDistance = distance;
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff also been set, and the smallest side of the periodic box is at least twice the cutoff
...@@ -106,7 +119,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -106,7 +119,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) { void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
assert(cutoff); assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance); assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
...@@ -126,9 +139,9 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -126,9 +139,9 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 || triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 || periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0); periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Ewald summation. Set the force to use Ewald summation.
...@@ -139,7 +152,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -139,7 +152,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) { void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald) if (alpha != alphaEwald)
tableIsValid = false; tableIsValid = false;
alphaEwald = alpha; alphaEwald = alpha;
...@@ -148,9 +161,9 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -148,9 +161,9 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
numRz = kmaxz; numRz = kmaxz;
ewald = true; ewald = true;
tabulateEwaldScaleFactor(); tabulateEwaldScaleFactor();
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation. Set the force to use Particle-Mesh Ewald (PME) summation.
...@@ -159,7 +172,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -159,7 +172,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) { void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald) if (alpha != alphaEwald)
tableIsValid = false; tableIsValid = false;
alphaEwald = alpha; alphaEwald = alpha;
...@@ -168,10 +181,40 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -168,10 +181,40 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
meshDim[2] = meshSize[2]; meshDim[2] = meshSize[2];
pme = true; pme = true;
tabulateEwaldScaleFactor(); tabulateEwaldScaleFactor();
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseLJPME(float alpha, int meshSize[3]) {
if (alpha != alphaDispersionEwald)
expTableIsValid = false;
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
tabulateExpTerms();
if(cutoffDistance != 0.0f){
// We set this here, in case setUseLJPME is called after the cutoff is set
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
} }
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() { void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if (tableIsValid) if (tableIsValid)
return; return;
tableIsValid = true; tableIsValid = true;
...@@ -188,8 +231,28 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -188,8 +231,28 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
} }
} }
void CpuNonbondedForce::tabulateExpTerms() {
if (expTableIsValid)
return;
expTableIsValid = true;
exptermsDX = cutoffDistance/NUM_TABLE_POINTS;
exptermsDXInv = 1.0f/exptermsDX;
exptermsTable.resize(NUM_TABLE_POINTS+4);
dExptermsTable.resize(NUM_TABLE_POINTS+4);
for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
double r = i*ewaldDX;
double dalphaR = alphaDispersionEwald*r;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
exptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
dExptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions, const vector<pair<float, float> >& atomParameters, const vector<float> &C6params, const vector<set<int> >& exclusions,
vector<RealVec>& forces, double* totalEnergy) const { vector<RealVec>& forces, double* totalEnergy) const {
typedef std::complex<float> d_complex; typedef std::complex<float> d_complex;
...@@ -211,6 +274,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -211,6 +274,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
if (totalEnergy) if (totalEnergy)
*totalEnergy += recipEnergy; *totalEnergy += recipEnergy;
pme_destroy(pmedata); pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<RealVec> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = (RealOpenMM)C6params[i];
dpmeforces.push_back(RealVec());
}
RealOpenMM recipDispersionEnergy = 0.0;
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
}
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
pme_destroy(pmedata);
}
} }
// Ewald method // Ewald method
...@@ -224,7 +310,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -224,7 +310,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup K-vectors // setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z] #define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3); vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms); vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms); vector<d_complex> tab_qxyz(numberOfAtoms);
...@@ -301,13 +387,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -301,13 +387,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters, void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) { const vector<float>& C6params, const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
this->posq = posq; this->posq = posq;
this->atomCoordinates = &atomCoordinates[0]; this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0]; this->atomParameters = &atomParameters[0];
this->C6params = &C6params[0];
this->exclusions = &exclusions[0]; this->exclusions = &exclusions[0];
this->threadForce = &threadForce; this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL); includeEnergy = (totalEnergy != NULL);
...@@ -319,7 +406,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -319,7 +406,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this); ComputeDirectTask task(*this);
threads.execute(task); threads.execute(task); // ACS calls threadcomputedirect
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to subtract the exclusions. // Signal the threads to subtract the exclusions.
...@@ -350,9 +437,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -350,9 +437,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float* forces = &(*threadForce)[threadIndex][0]; float* forces = &(*threadForce)[threadIndex][0];
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0); fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0); fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (ewald || pme) { if (ewald || pme || ljpme) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
while (true) { while (true) {
int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1); int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
if (nextBlock >= neighborList->getNumBlocks()) if (nextBlock >= neighborList->getNumBlocks())
...@@ -395,6 +481,17 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -395,6 +481,17 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
} }
else if (includeEnergy) else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3]; threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
if (ljpme) {
float C6ij = C6params[i]*C6params[j];
float inverseR2 = 1.0f/r2;
float emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
if(includeEnergy)
threadEnergy[threadIndex] += emult;
float dEdR = -6.0f*C6ij*inverseR2*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j);
}
} }
} }
} }
...@@ -476,7 +573,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -476,7 +573,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
fvec4 result = deltaR*dEdR; fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii); (fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj); (fvec4(forces+4*jj)-result).store(forces+4*jj);
} }
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
deltaR = posJ-posI; deltaR = posJ-posI;
...@@ -502,3 +599,18 @@ float CpuNonbondedForce::erfcApprox(float x) { ...@@ -502,3 +599,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
return coeff1*erfcTable[index] + coeff2*erfcTable[index+1]; return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
} }
float CpuNonbondedForce::exptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*exptermsTable[index] + coeff2*exptermsTable[index+1];
}
float CpuNonbondedForce::dExptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*dExptermsTable[index] + coeff2*dExptermsTable[index+1];
}
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h" #include "CpuNonbondedForceVec4.h"
#include <algorithm> #include <algorithm>
#include <iostream>
using namespace std; using namespace std;
using namespace OpenMM; using namespace OpenMM;
...@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions. // Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType; PeriodicType periodicType;
fvec4 blockCenter; fvec4 blockCenter;
if (!periodic) { if (!periodic) {
...@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces ...@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
template <int PERIODIC_TYPE> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) { void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4]; fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f); fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
...@@ -278,6 +277,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -278,6 +277,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]); fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first); fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second); fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
fvec4 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic); const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
...@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 sig2 = inverseR*sig; fvec4 sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
fvec4 sig6 = sig2*sig2*sig2; fvec4 sig6 = sig2*sig2*sig2;
fvec4 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6; fvec4 eps = blockAtomEpsilon*atomEpsilon;
fvec4 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f); dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f); energy = epsSig6*(sig6-1.0f);
if (useSwitch) { if (useSwitch) {
...@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r; dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue; energy *= switchValue;
} }
if (ljpme) {
fvec4 C6ij = C6s*C6params[atom];
fvec4 inverseR2 = inverseR*inverseR;
fvec4 mysig2 = sig*sig;
fvec4 mysig6 = mysig2*mysig2*mysig2;
fvec4 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec4 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
} }
else { else {
energy = 0.0f; energy = 0.0f;
...@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) { ...@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
transpose(t1, t2, t3, t4); transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2; return coeff1*t1 + coeff2*t2;
} }
fvec4 CpuNonbondedForceVec4::exptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&exptermsTable[index[0]]);
fvec4 t2(&exptermsTable[index[1]]);
fvec4 t3(&exptermsTable[index[2]]);
fvec4 t4(&exptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::dExptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&dExptermsTable[index[0]]);
fvec4 t2(&dExptermsTable[index[1]]);
fvec4 t3(&dExptermsTable[index[2]]);
fvec4 t4(&dExptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h" #include "openmm/internal/hardware.h"
#include <algorithm> #include <algorithm>
#include <iostream>
using namespace std; using namespace std;
using namespace OpenMM; using namespace OpenMM;
...@@ -81,7 +82,6 @@ enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, Periodic ...@@ -81,7 +82,6 @@ enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, Periodic
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions. // Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType; PeriodicType periodicType;
fvec4 blockCenter; fvec4 blockCenter;
if (!periodic) { if (!periodic) {
...@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
blockAtomCharge *= ONE_4PI_EPS0; blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first); fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second); fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
fvec8 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]], C6params[blockAtom[4]], C6params[blockAtom[5]], C6params[blockAtom[6]], C6params[blockAtom[7]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic); const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
...@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec8 sig2 = inverseR*sig; fvec8 sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
fvec8 sig6 = sig2*sig2*sig2; fvec8 sig6 = sig2*sig2*sig2;
fvec8 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6; fvec8 eps = blockAtomEpsilon*atomEpsilon;
fvec8 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f); dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f); energy = epsSig6*(sig6-1.0f);
if (useSwitch) { if (useSwitch) {
...@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r; dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue; energy *= switchValue;
} }
if (ljpme) {
fvec8 C6ij = C6s*C6params[atom];
fvec8 inverseR2 = inverseR*inverseR;
fvec8 mysig2 = sig*sig;
fvec8 mysig6 = mysig2*mysig2*mysig2;
fvec8 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec8 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
} }
else { else {
energy = 0.0f; energy = 0.0f;
...@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) { ...@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4); transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2; return coeff1*s1 + coeff2*s2;
} }
fvec8 CpuNonbondedForceVec8::exptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&exptermsTable[indexLower[0]]);
fvec4 t2(&exptermsTable[indexLower[1]]);
fvec4 t3(&exptermsTable[indexLower[2]]);
fvec4 t4(&exptermsTable[indexLower[3]]);
fvec4 t5(&exptermsTable[indexUpper[0]]);
fvec4 t6(&exptermsTable[indexUpper[1]]);
fvec4 t7(&exptermsTable[indexUpper[2]]);
fvec4 t8(&exptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::dExptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&dExptermsTable[indexLower[0]]);
fvec4 t2(&dExptermsTable[indexLower[1]]);
fvec4 t3(&dExptermsTable[indexLower[2]]);
fvec4 t4(&dExptermsTable[indexLower[3]]);
fvec4 t5(&dExptermsTable[indexUpper[0]]);
fvec4 t6(&dExptermsTable[indexUpper[1]]);
fvec4 t7(&dExptermsTable[indexUpper[2]]);
fvec4 t8(&dExptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
#endif #endif
...@@ -127,6 +127,8 @@ void CpuPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -127,6 +127,8 @@ void CpuPlatform::contextDestroyed(ContextImpl& context) const {
PlatformData* data = contextData[&context]; PlatformData* data = contextData[&context];
delete data; delete data;
contextData.erase(&context); contextData.erase(&context);
ReferencePlatform::PlatformData* refPlatformData = reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData());
delete refPlatformData;
} }
CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) { CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) {
......
...@@ -599,7 +599,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel { ...@@ -599,7 +599,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform), CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL), cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) { pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL),
pmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
} }
~CudaCalcNonbondedForceKernel(); ~CudaCalcNonbondedForceKernel();
/** /**
...@@ -636,6 +637,15 @@ public: ...@@ -636,6 +637,15 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class SortTrait : public CudaSort::SortTrait { class SortTrait : public CudaSort::SortTrait {
int getDataSize() const {return 8;} int getDataSize() const {return 8;}
...@@ -667,26 +677,36 @@ private: ...@@ -667,26 +677,36 @@ private:
CudaArray* pmeEnergyBuffer; CudaArray* pmeEnergyBuffer;
CudaSort* sort; CudaSort* sort;
Kernel cpuPme; Kernel cpuPme;
Kernel cpuDispersionPme;
PmeIO* pmeio; PmeIO* pmeio;
CUstream pmeStream; CUstream pmeStream;
CUevent pmeSyncEvent; CUevent pmeSyncEvent;
CudaFFT3D* fft; CudaFFT3D* fft;
cufftHandle fftForward; cufftHandle fftForward;
cufftHandle fftBackward; cufftHandle fftBackward;
CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward;
CUfunction ewaldSumsKernel; CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel; CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel; CUfunction pmeGridIndexKernel;
CUfunction pmeDispersionGridIndexKernel;
CUfunction pmeSpreadChargeKernel; CUfunction pmeSpreadChargeKernel;
CUfunction pmeDispersionSpreadChargeKernel;
CUfunction pmeFinishSpreadChargeKernel; CUfunction pmeFinishSpreadChargeKernel;
CUfunction pmeDispersionFinishSpreadChargeKernel;
CUfunction pmeEvalEnergyKernel; CUfunction pmeEvalEnergyKernel;
CUfunction pmeEvalDispersionEnergyKernel;
CUfunction pmeConvolutionKernel; CUfunction pmeConvolutionKernel;
CUfunction pmeDispersionConvolutionKernel;
CUfunction pmeInterpolateForceKernel; CUfunction pmeInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines; CUfunction pmeInterpolateDispersionForceKernel;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha; double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int interpolateForceThreads; int interpolateForceThreads;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT; int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
...@@ -1432,7 +1452,7 @@ private: ...@@ -1432,7 +1452,7 @@ private:
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context); Lepton::ExpressionTreeNode replaceDerivFunctions(const Lepton::ExpressionTreeNode& node, OpenMM::ContextImpl& context);
void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes); void findExpressionsForDerivs(const Lepton::ExpressionTreeNode& node, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variableNodes);
void recordGlobalValue(double value, GlobalTarget target); void recordGlobalValue(double value, GlobalTarget target, CustomIntegrator& integrator);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step); bool evaluateCondition(int step);
CudaContext& cu; CudaContext& cu;
......
...@@ -78,8 +78,9 @@ public: ...@@ -78,8 +78,9 @@ public:
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded * @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction * @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated * @param forceGroup the force group in which the interaction should be calculated
* @param supportsPairList specifies whether this interaction can work with a neighbor list that uses a separate pair list
*/ */
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup); void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool supportsPairList=false);
/** /**
* Add a per-atom parameter that the default interaction kernel may depend on. * Add a per-atom parameter that the default interaction kernel may depend on.
*/ */
...@@ -189,6 +190,12 @@ public: ...@@ -189,6 +190,12 @@ public:
CudaArray& getInteractingAtoms() { CudaArray& getInteractingAtoms() {
return *interactingAtoms; return *interactingAtoms;
} }
/**
* Get the array containing single pairs in the neighbor list.
*/
CudaArray& getSinglePairs() {
return *singlePairs;
}
/** /**
* Get the array containing exclusion flags. * Get the array containing exclusion flags.
*/ */
...@@ -270,6 +277,8 @@ private: ...@@ -270,6 +277,8 @@ private:
CudaArray* interactingTiles; CudaArray* interactingTiles;
CudaArray* interactingAtoms; CudaArray* interactingAtoms;
CudaArray* interactionCount; CudaArray* interactionCount;
CudaArray* singlePairs;
CudaArray* singlePairCount;
CudaArray* blockCenter; CudaArray* blockCenter;
CudaArray* blockBoundingBox; CudaArray* blockBoundingBox;
CudaArray* sortedBlocks; CudaArray* sortedBlocks;
...@@ -288,8 +297,8 @@ private: ...@@ -288,8 +297,8 @@ private:
std::map<int, double> groupCutoff; std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList; bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags; int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
}; };
/** /**
......
...@@ -83,7 +83,7 @@ private: ...@@ -83,7 +83,7 @@ private:
std::vector<Kernel> kernels; std::vector<Kernel> kernels;
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions; std::vector<double> contextNonbondedFractions;
int* tileCounts; int2* interactionCounts;
CudaArray* contextForces; CudaArray* contextForces;
void* pinnedPositionBuffer; void* pinnedPositionBuffer;
long long* pinnedForceBuffer; long long* pinnedForceBuffer;
...@@ -439,6 +439,15 @@ public: ...@@ -439,6 +439,15 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class Task; class Task;
CudaPlatform::PlatformData& data; CudaPlatform::PlatformData& data;
......
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include <set> #include <set>
#include <sstream> #include <sstream>
#include <typeinfo> #include <typeinfo>
#include <sys/stat.h>
#include <cudaProfiler.h> #include <cudaProfiler.h>
#ifndef WIN32 #ifndef WIN32
#include <unistd.h> #include <unistd.h>
...@@ -127,9 +128,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -127,9 +128,12 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null"; string testCompilerCommand = this->compiler+" --version > /dev/null 2> /dev/null";
int res = std::system(testCompilerCommand.c_str()); int res = std::system(testCompilerCommand.c_str());
#endif #endif
isNvccAvailable = (res == 0); struct stat info;
isNvccAvailable = (res == 0 && stat(tempDir.c_str(), &info) == 0);
int cudaDriverVersion;
cuDriverGetVersion(&cudaDriverVersion);
static bool hasShownNvccWarning = false; static bool hasShownNvccWarning = false;
if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning) { if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning && cudaDriverVersion < 8000) {
hasShownNvccWarning = true; hasShownNvccWarning = true;
printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. "); printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. ");
#ifdef WIN32 #ifdef WIN32
...@@ -205,14 +209,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -205,14 +209,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int major, minor; int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device)); CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
#if __CUDA_API_VERSION < 7000 int numThreadBlocksPerComputeUnit = (major >= 6 ? 4 : 6);
if (cudaDriverVersion < 7000) {
// This is a workaround to support GTX 980 with CUDA 6.5. It reports // This is a workaround to support GTX 980 with CUDA 6.5. It reports
// its compute capability as 5.2, but the compiler doesn't support // its compute capability as 5.2, but the compiler doesn't support
// anything beyond 5.0. // anything beyond 5.0.
if (major == 5) if (major == 5)
minor = 0; minor = 0;
#endif }
#if __CUDA_API_VERSION < 8000 if (cudaDriverVersion < 8000) {
// This is a workaround to support Pascal with CUDA 7.5. It reports // This is a workaround to support Pascal with CUDA 7.5. It reports
// its compute capability as 6.x, but the compiler doesn't support // its compute capability as 6.x, but the compiler doesn't support
// anything beyond 5.3. // anything beyond 5.3.
...@@ -220,7 +225,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -220,7 +225,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
major = 5; major = 5;
minor = 3; minor = 3;
} }
#endif }
gpuArchitecture = intToString(major)+intToString(minor); gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor; computeCapability = major+0.1*minor;
...@@ -241,7 +246,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -241,7 +246,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize; numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
int multiprocessors; int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device)); CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int numThreadBlocksPerComputeUnit = 6;
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors; numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
if (useDoublePrecision) { if (useDoublePrecision) {
posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq"); posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment