Commit a9f65649 authored by Peter Eastman's avatar Peter Eastman
Browse files

Merge branch 'dpme' of https://github.com/andysim/openmm into ljpme

parents 9567ddb3 58b6e3b6
...@@ -555,7 +555,8 @@ public: ...@@ -555,7 +555,8 @@ public:
CutoffNonPeriodic = 1, CutoffNonPeriodic = 1,
CutoffPeriodic = 2, CutoffPeriodic = 2,
Ewald = 3, Ewald = 3,
PME = 4 PME = 4,
LJPME = 5
}; };
static std::string Name() { static std::string Name() {
return "CalcNonbondedForce"; return "CalcNonbondedForce";
...@@ -589,13 +590,22 @@ public: ...@@ -589,13 +590,22 @@ public:
virtual void copyParametersToContext(ContextImpl& context, const NonbondedForce& force) = 0; virtual void copyParametersToContext(ContextImpl& context, const NonbondedForce& force) = 0;
/** /**
* Get the parameters being used for PME. * Get the parameters being used for PME.
* *
* @param alpha the separation parameter * @param alpha the separation parameter
* @param nx the number of grid points along the X axis * @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis * @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
virtual void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const = 0; virtual void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const = 0;
/**
* Get the parameters being used for the dispersion terms in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
virtual void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const = 0;
}; };
/** /**
...@@ -1335,6 +1345,57 @@ public: ...@@ -1335,6 +1345,57 @@ public:
}; };
/**
* This kernel performs the dispersion reciprocal space calculation for LJPME. In most cases, this
* calculation is done directly by CalcNonbondedForceKernel so this kernel is unneeded.
* In some cases it may want to outsource the work to a different kernel. In particular,
* GPU based platforms sometimes use a CPU based implementation provided by a separate
* plugin.
*/
class CalcDispersionPmeReciprocalForceKernel : public KernelImpl {
public:
class IO;
static std::string Name() {
return "CalcDispersionPmeReciprocalForce";
}
CalcDispersionPmeReciprocalForceKernel(std::string name, const Platform& platform) : KernelImpl(name, platform) {
}
/**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) = 0;
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxVectors the vectors defining the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
virtual void beginComputation(IO& io, const Vec3* periodicBoxVectors, bool includeEnergy) = 0;
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
virtual double finishComputation(IO& io) = 0;
/**
* Get the parameters being used for PME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
virtual void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const = 0;
};
} // namespace OpenMM } // namespace OpenMM
#endif /*OPENMM_KERNELS_H_*/ #endif /*OPENMM_KERNELS_H_*/
...@@ -109,7 +109,12 @@ public: ...@@ -109,7 +109,12 @@ public:
* Periodic boundary conditions are used, and Particle-Mesh Ewald (PME) summation is used to compute the interaction of each particle * Periodic boundary conditions are used, and Particle-Mesh Ewald (PME) summation is used to compute the interaction of each particle
* with all periodic copies of every other particle. * with all periodic copies of every other particle.
*/ */
PME = 4 PME = 4,
/**
* Periodic boundary conditions are used, and Particle-Mesh Ewald (PME) summation is used to compute the interaction of each particle
* with all periodic copies of every other particle for both electrostatics and dispersion. No switching is used for either interaction.
*/
LJPME = 5
}; };
/** /**
* Create a NonbondedForce. * Create a NonbondedForce.
...@@ -207,6 +212,16 @@ public: ...@@ -207,6 +212,16 @@ public:
* @param[out] nz the number of grid points along the Z axis * @param[out] nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters to use for dispersion term in LJ-PME calculations. If alpha is 0 (the default),
* these parameters are ignored and instead their values are chosen based on the Ewald error tolerance.
*
* @param[out] alpha the separation parameter
* @param[out] nx the number of dispersion grid points along the X axis
* @param[out] ny the number of dispersion grid points along the Y axis
* @param[out] nz the number of dispersion grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/** /**
* Set the parameters to use for PME calculations. If alpha is 0 (the default), these parameters are * Set the parameters to use for PME calculations. If alpha is 0 (the default), these parameters are
* ignored and instead their values are chosen based on the Ewald error tolerance. * ignored and instead their values are chosen based on the Ewald error tolerance.
...@@ -217,6 +232,16 @@ public: ...@@ -217,6 +232,16 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void setPMEParameters(double alpha, int nx, int ny, int nz); void setPMEParameters(double alpha, int nx, int ny, int nz);
/**
* Set the parameters to use for the dispersion term in LJPME calculations. If alpha is 0 (the default),
* these parameters are ignored and instead their values are chosen based on the Ewald error tolerance.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void setLJPMEParameters(double alpha, int nx, int ny, int nz);
/** /**
* Get the parameters being used for PME in a particular Context. Because some platforms have restrictions * Get the parameters being used for PME in a particular Context. Because some platforms have restrictions
* on the allowed grid sizes, the values that are actually used may be slightly different from those * on the allowed grid sizes, the values that are actually used may be slightly different from those
...@@ -230,6 +255,19 @@ public: ...@@ -230,6 +255,19 @@ public:
* @param[out] nz the number of grid points along the Z axis * @param[out] nz the number of grid points along the Z axis
*/ */
void getPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const; void getPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the PME parameters being used for the dispersion term for LJPME in a particular Context. Because some
* platforms have restrictions on the allowed grid sizes, the values that are actually used may be slightly different
* from those specified with setPMEParameters(), or the standard values calculated based on the Ewald error tolerance.
* See the manual for details.
*
* @param context the Context for which to get the parameters
* @param[out] alpha the separation parameter
* @param[out] nx the number of grid points along the X axis
* @param[out] ny the number of grid points along the Y axis
* @param[out] nz the number of grid points along the Z axis
*/
void getLJPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const;
/** /**
* Add the nonbonded force parameters for a particle. This should be called once for each particle * Add the nonbonded force parameters for a particle. This should be called once for each particle
* in the System. When it is called for the i'th time, it specifies the parameters for the i'th particle. * in the System. When it is called for the i'th time, it specifies the parameters for the i'th particle.
...@@ -382,9 +420,9 @@ private: ...@@ -382,9 +420,9 @@ private:
class ParticleInfo; class ParticleInfo;
class ExceptionInfo; class ExceptionInfo;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
double cutoffDistance, switchingDistance, rfDielectric, ewaldErrorTol, alpha; double cutoffDistance, switchingDistance, rfDielectric, ewaldErrorTol, alpha, dalpha;
bool useSwitchingFunction, useDispersionCorrection; bool useSwitchingFunction, useDispersionCorrection;
int recipForceGroup, nx, ny, nz; int recipForceGroup, nx, ny, nz, dnx, dny, dnz;
void addExclusionsToSet(const std::vector<std::set<int> >& bonded12, std::set<int>& exclusions, int baseParticle, int fromParticle, int currentLevel) const; void addExclusionsToSet(const std::vector<std::set<int> >& bonded12, std::set<int>& exclusions, int baseParticle, int fromParticle, int currentLevel) const;
std::vector<ParticleInfo> particles; std::vector<ParticleInfo> particles;
std::vector<ExceptionInfo> exceptions; std::vector<ExceptionInfo> exceptions;
......
...@@ -65,6 +65,7 @@ public: ...@@ -65,6 +65,7 @@ public:
std::vector<std::string> getKernelNames(); std::vector<std::string> getKernelNames();
void updateParametersInContext(ContextImpl& context); void updateParametersInContext(ContextImpl& context);
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/** /**
* This is a utility routine that calculates the values to use for alpha and kmax when using * This is a utility routine that calculates the values to use for alpha and kmax when using
* Ewald summation. * Ewald summation.
...@@ -74,7 +75,7 @@ public: ...@@ -74,7 +75,7 @@ public:
* This is a utility routine that calculates the values to use for alpha and grid size when using * This is a utility routine that calculates the values to use for alpha and grid size when using
* Particle Mesh Ewald. * Particle Mesh Ewald.
*/ */
static void calcPMEParameters(const System& system, const NonbondedForce& force, double& alpha, int& xsize, int& ysize, int& zsize); static void calcPMEParameters(const System& system, const NonbondedForce& force, double& alpha, int& xsize, int& ysize, int& zsize, bool LJ);
/** /**
* Compute the coefficient which, when divided by the periodic box volume, gives the * Compute the coefficient which, when divided by the periodic box volume, gives the
* long range dispersion correction to the energy. * long range dispersion correction to the energy.
......
...@@ -106,6 +106,13 @@ void NonbondedForce::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) ...@@ -106,6 +106,13 @@ void NonbondedForce::getPMEParameters(double& alpha, int& nx, int& ny, int& nz)
nz = this->nz; nz = this->nz;
} }
void NonbondedForce::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
alpha = this->dalpha;
nx = this->dnx;
ny = this->dny;
nz = this->dnz;
}
void NonbondedForce::setPMEParameters(double alpha, int nx, int ny, int nz) { void NonbondedForce::setPMEParameters(double alpha, int nx, int ny, int nz) {
this->alpha = alpha; this->alpha = alpha;
this->nx = nx; this->nx = nx;
...@@ -113,10 +120,21 @@ void NonbondedForce::setPMEParameters(double alpha, int nx, int ny, int nz) { ...@@ -113,10 +120,21 @@ void NonbondedForce::setPMEParameters(double alpha, int nx, int ny, int nz) {
this->nz = nz; this->nz = nz;
} }
void NonbondedForce::setLJPMEParameters(double alpha, int nx, int ny, int nz) {
this->dalpha = alpha;
this->dnx = nx;
this->dny = ny;
this->dnz = nz;
}
void NonbondedForce::getPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const { void NonbondedForce::getPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const NonbondedForceImpl&>(getImplInContext(context)).getPMEParameters(alpha, nx, ny, nz); dynamic_cast<const NonbondedForceImpl&>(getImplInContext(context)).getPMEParameters(alpha, nx, ny, nz);
} }
void NonbondedForce::getLJPMEParametersInContext(const Context& context, double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const NonbondedForceImpl&>(getImplInContext(context)).getLJPMEParameters(alpha, nx, ny, nz);
}
int NonbondedForce::addParticle(double charge, double sigma, double epsilon) { int NonbondedForce::addParticle(double charge, double sigma, double epsilon) {
particles.push_back(ParticleInfo(charge, sigma, epsilon)); particles.push_back(ParticleInfo(charge, sigma, epsilon));
return particles.size()-1; return particles.size()-1;
......
...@@ -151,8 +151,12 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond ...@@ -151,8 +151,12 @@ void NonbondedForceImpl::calcEwaldParameters(const System& system, const Nonbond
kmaxz++; kmaxz++;
} }
void NonbondedForceImpl::calcPMEParameters(const System& system, const NonbondedForce& force, double& alpha, int& xsize, int& ysize, int& zsize) { void NonbondedForceImpl::calcPMEParameters(const System& system, const NonbondedForce& force, double& alpha, int& xsize, int& ysize, int& zsize, bool LJ) {
force.getPMEParameters(alpha, xsize, ysize, zsize); if(LJ) {
force.getLJPMEParameters(alpha, xsize, ysize, zsize);
} else {
force.getPMEParameters(alpha, xsize, ysize, zsize);
}
if (alpha == 0.0) { if (alpha == 0.0) {
Vec3 boxVectors[3]; Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]); system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
...@@ -283,3 +287,7 @@ void NonbondedForceImpl::updateParametersInContext(ContextImpl& context) { ...@@ -283,3 +287,7 @@ void NonbondedForceImpl::updateParametersInContext(ContextImpl& context) {
void NonbondedForceImpl::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const { void NonbondedForceImpl::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
kernel.getAs<CalcNonbondedForceKernel>().getPMEParameters(alpha, nx, ny, nz); kernel.getAs<CalcNonbondedForceKernel>().getPMEParameters(alpha, nx, ny, nz);
} }
void NonbondedForceImpl::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
kernel.getAs<CalcNonbondedForceKernel>().getLJPMEParameters(alpha, nx, ny, nz);
}
...@@ -251,27 +251,37 @@ public: ...@@ -251,27 +251,37 @@ public:
void copyParametersToContext(ContextImpl& context, const NonbondedForce& force); void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
/** /**
* Get the parameters being used for PME. * Get the parameters being used for PME.
* *
* @param alpha the separation parameter * @param alpha the separation parameter
* @param nx the number of grid points along the X axis * @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis * @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class PmeIO; class PmeIO;
CpuPlatform::PlatformData& data; CpuPlatform::PlatformData& data;
int numParticles, num14; int numParticles, num14;
int **bonded14IndexArray; int **bonded14IndexArray;
double **bonded14ParamArray; double **bonded14ParamArray;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldSelfEnergy, dispersionCoefficient; double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient;
int kmax[3], gridSize[3]; int kmax[3], gridSize[3], dispersionGridSize[3];
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme; bool useSwitchingFunction, useOptimizedPme, hasInitializedPme, hasInitializedDispersionPme;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams; std::vector<std::pair<float, float> > particleParams;
std::vector<float> C6params;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuNonbondedForce* nonbonded; CpuNonbondedForce* nonbonded;
Kernel optimizedPme; Kernel optimizedPme, optimizedDispersionPme;
CpuBondForce bondForce; CpuBondForce bondForce;
}; };
......
...@@ -104,16 +104,27 @@ class CpuNonbondedForce { ...@@ -104,16 +104,27 @@ class CpuNonbondedForce {
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation. Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter @param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh @param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void setUsePME(float alpha, int meshSize[3]); void setUsePME(float alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void setUseLJPME(float alpha, int meshSize[3]);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Calculate Ewald ixn Calculate Ewald ixn
...@@ -122,16 +133,17 @@ class CpuNonbondedForce { ...@@ -122,16 +133,17 @@ class CpuNonbondedForce {
@param posq atom coordinates and charges @param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME) @param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon)) @param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param C6Paramrs C6 parameters for multiplicative representation of dispersion
@param exclusions atom exclusion indices @param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added) @param forces force array (forces added)
@param totalEnergy total energy @param totalEnergy total energy
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions, const std::vector<std::pair<float, float> >& atomParameters, const std::vector<float> &C6params,
std::vector<RealVec>& forces, double* totalEnergy) const; const std::vector<std::set<int> >& exclusions, std::vector<RealVec>& forces, double* totalEnergy) const;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -150,7 +162,7 @@ class CpuNonbondedForce { ...@@ -150,7 +162,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters, void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads); const std::vector<float>& C6params, const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
...@@ -163,28 +175,32 @@ protected: ...@@ -163,28 +175,32 @@ protected:
bool periodic; bool periodic;
bool triclinic; bool triclinic;
bool ewald; bool ewald;
bool pme; bool ljpme, pme;
bool tableIsValid; bool tableIsValid, expTableIsValid;
const CpuNeighborList* neighborList; const CpuNeighborList* neighborList;
float recipBoxSize[3]; float recipBoxSize[3];
RealVec periodicBoxVectors[3]; RealVec periodicBoxVectors[3];
AlignedArray<fvec4> periodicBoxVec4; AlignedArray<fvec4> periodicBoxVec4;
float cutoffDistance, switchingDistance; float cutoffDistance, switchingDistance;
float krf, crf; float krf, crf;
float alphaEwald; float alphaEwald, alphaDispersionEwald;
int numRx, numRy, numRz; int numRx, numRy, numRz;
int meshDim[3]; int meshDim[3], dispersionMeshDim[3];
std::vector<float> erfcTable, ewaldScaleTable; std::vector<float> erfcTable, ewaldScaleTable;
float ewaldDX, ewaldDXInv, erfcDXInv; std::vector<float> exptermsTable, dExptermsTable;
float ewaldDX, ewaldDXInv, erfcDXInv, exptermsDX, exptermsDXInv;
std::vector<double> threadEnergy; std::vector<double> threadEnergy;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
int numberOfAtoms; int numberOfAtoms;
float* posq; float* posq;
RealVec const* atomCoordinates; RealVec const* atomCoordinates;
std::pair<float, float> const* atomParameters; std::pair<float, float> const* atomParameters;
float const *C6params;
std::set<int> const* exclusions; std::set<int> const* exclusions;
std::vector<AlignedArray<float> >* threadForce; std::vector<AlignedArray<float> >* threadForce;
bool includeEnergy; bool includeEnergy;
float inverseRcut6;
float inverseRcut6Expterm;
void* atomicCounter; void* atomicCounter;
static const float TWO_OVER_SQRT_PI; static const float TWO_OVER_SQRT_PI;
...@@ -238,10 +254,29 @@ protected: ...@@ -238,10 +254,29 @@ protected:
*/ */
void tabulateEwaldScaleFactor(); void tabulateEwaldScaleFactor();
/**
* Create a lookup table for the scale factor used with dispersion PME.
*/
void tabulateExpTerms();
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
float erfcApprox(float x); float erfcApprox(float x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
float exptermsApprox(float R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
float dExptermsApprox(float R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -88,11 +88,25 @@ protected: ...@@ -88,11 +88,25 @@ protected:
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
fvec4 erfcApprox(const fvec4& x); fvec4 erfcApprox(const fvec4& x);
/** /**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/ */
fvec4 ewaldScaleFunction(const fvec4& x); fvec4 ewaldScaleFunction(const fvec4& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec4 exptermsApprox(const fvec4& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec4 dExptermsApprox(const fvec4& R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -92,6 +92,21 @@ protected: ...@@ -92,6 +92,21 @@ protected:
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/ */
fvec8 ewaldScaleFunction(const fvec8& x); fvec8 ewaldScaleFunction(const fvec8& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec8 exptermsApprox(const fvec8& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec8 dExptermsApprox(const fvec8& R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include "lepton/CustomFunction.h" #include "lepton/CustomFunction.h"
#include "lepton/Operation.h" #include "lepton/Operation.h"
#include "lepton/Parser.h" #include "lepton/Parser.h"
#include <iostream>
#include "lepton/ParsedExpression.h" #include "lepton/ParsedExpression.h"
using namespace OpenMM; using namespace OpenMM;
...@@ -528,7 +529,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4(); ...@@ -528,7 +529,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce* createCpuNonbondedForceVec8(); CpuNonbondedForce* createCpuNonbondedForceVec8();
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform), CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), nonbonded(NULL) { data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
if (isVec8Supported()) if (isVec8Supported())
nonbonded = createCpuNonbondedForceVec8(); nonbonded = createCpuNonbondedForceVec8();
else else
...@@ -575,12 +576,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -575,12 +576,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++) for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3]; bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles); particleParams.resize(numParticles);
C6params.resize(numParticles);
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth; double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth); force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge; data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth))); particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
...@@ -616,19 +619,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -616,19 +619,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
} }
else if (nonbondedMethod == PME) { else if (nonbondedMethod == PME) {
double alpha; double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]); NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha; ewaldAlpha = alpha;
} }
if (nonbondedMethod == Ewald || nonbondedMethod == PME) else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = (RealOpenMM) alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = (RealOpenMM) alpha;
useSwitchingFunction = false;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI); ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
else if(nonbondedMethod == LJPME){
for (int atom = 0; atom < numParticles; atom++) {
// Dispersion self term
ewaldSelfEnergy += pow(ewaldDispersionAlpha, 6.0) * C6params[atom]*C6params[atom] / 12.0;
}
}
} else {
ewaldSelfEnergy = 0.0; ewaldSelfEnergy = 0.0;
}
rfDielectric = force.getReactionFieldDielectric(); rfDielectric = force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection()) if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force); dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME); data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
} }
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
...@@ -646,6 +665,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -646,6 +665,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
} }
} }
if (nonbondedMethod == LJPME) {
// If available, use the optimized PME implementation.
vector<string> kernelNames;
kernelNames.push_back("CalcPmeReciprocalForce");
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
}
}
} }
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context); vector<RealVec>& posData = extractPositions(context);
...@@ -654,6 +687,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -654,6 +687,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0); double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald); bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff) if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric); nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
if (data.isPeriodic) { if (data.isPeriodic) {
...@@ -669,9 +703,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -669,9 +703,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded->setUsePME(ewaldAlpha, gridSize); nonbonded->setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction) if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance); nonbonded->setUseSwitchingFunction(switchingDistance);
if (ljpme){
nonbonded->setUsePME(ewaldAlpha, gridSize);
nonbonded->setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
double nonbondedEnergy = 0; double nonbondedEnergy = 0;
if (includeDirect) if (includeDirect)
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads); nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
if (includeReciprocal) { if (includeReciprocal) {
if (useOptimizedPme) { if (useOptimizedPme) {
PmeIO io(&posq[0], &data.threadForce[0][0], numParticles); PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
...@@ -680,13 +718,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -680,13 +718,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io); nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
} }
else else
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL); nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
} }
energy += nonbondedEnergy; energy += nonbondedEnergy;
if (includeDirect) { if (includeDirect) {
ReferenceLJCoulomb14 nonbonded14; ReferenceLJCoulomb14 nonbonded14;
bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14); bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (data.isPeriodic) if (data.isPeriodic && nonbondedMethod != LJPME)
energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]); energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
} }
return energy; return energy;
...@@ -739,7 +777,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -739,7 +777,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
} }
void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const { void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME) if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME"); throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme) if (useOptimizedPme)
optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz); optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
...@@ -751,6 +789,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ...@@ -751,6 +789,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
} }
} }
void CpuCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedDispersionPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
}
CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) { CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) {
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ReferencePME.h" #include "ReferencePME.h"
#include "openmm/internal/gmx_atomic.h" #include "openmm/internal/gmx_atomic.h"
#include <algorithm> #include <algorithm>
#include <iostream>
// In case we're using some primitive version of Visual Studio this will // In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined. // make sure that erf() and erfc() are defined.
...@@ -57,7 +58,8 @@ public: ...@@ -57,7 +58,8 @@ public:
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) { CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
} }
CpuNonbondedForce::~CpuNonbondedForce() { CpuNonbondedForce::~CpuNonbondedForce() {
...@@ -78,10 +80,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig ...@@ -78,10 +80,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
tableIsValid = false; tableIsValid = false;
cutoff = true; cutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors; neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0); krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0); crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
} if(alphaDispersionEwald != 0.0f){
// We set this here, in case setUseCutoff is called after the dispersion alpha is set.
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -96,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -96,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
switchingDistance = distance; switchingDistance = distance;
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff also been set, and the smallest side of the periodic box is at least twice the cutoff
...@@ -106,7 +119,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -106,7 +119,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) { void CpuNonbondedForce::setPeriodic(RealVec* periodicBoxVectors) {
assert(cutoff); assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance); assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
...@@ -124,11 +137,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -124,11 +137,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0); periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0); periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 || triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 || periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0); periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Ewald summation. Set the force to use Ewald summation.
...@@ -139,18 +152,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -139,18 +152,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) { void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald) if (alpha != alphaEwald)
tableIsValid = false; tableIsValid = false;
alphaEwald = alpha; alphaEwald = alpha;
numRx = kmaxx; numRx = kmaxx;
numRy = kmaxy; numRy = kmaxy;
numRz = kmaxz; numRz = kmaxz;
ewald = true; ewald = true;
tabulateEwaldScaleFactor(); tabulateEwaldScaleFactor();
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation. Set the force to use Particle-Mesh Ewald (PME) summation.
...@@ -159,19 +172,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -159,19 +172,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) { void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald) if (alpha != alphaEwald)
tableIsValid = false; tableIsValid = false;
alphaEwald = alpha; alphaEwald = alpha;
meshDim[0] = meshSize[0]; meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1]; meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2]; meshDim[2] = meshSize[2];
pme = true; pme = true;
tabulateEwaldScaleFactor(); tabulateEwaldScaleFactor();
} }
void CpuNonbondedForce::tabulateEwaldScaleFactor() { /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseLJPME(float alpha, int meshSize[3]) {
if (alpha != alphaDispersionEwald)
expTableIsValid = false;
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
tabulateExpTerms();
if(cutoffDistance != 0.0f){
// We set this here, in case setUseLJPME is called after the cutoff is set
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if (tableIsValid) if (tableIsValid)
return; return;
tableIsValid = true; tableIsValid = true;
...@@ -187,10 +230,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -187,10 +230,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR); ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
} }
} }
void CpuNonbondedForce::tabulateExpTerms() {
if (expTableIsValid)
return;
expTableIsValid = true;
exptermsDX = cutoffDistance/NUM_TABLE_POINTS;
exptermsDXInv = 1.0f/exptermsDX;
exptermsTable.resize(NUM_TABLE_POINTS+4);
dExptermsTable.resize(NUM_TABLE_POINTS+4);
for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
double r = i*ewaldDX;
double dalphaR = alphaDispersionEwald*r;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
exptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
dExptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions, const vector<pair<float, float> >& atomParameters, const vector<float> &C6params, const vector<set<int> >& exclusions,
vector<RealVec>& forces, double* totalEnergy) const { vector<RealVec>& forces, double* totalEnergy) const {
typedef std::complex<float> d_complex; typedef std::complex<float> d_complex;
static const float epsilon = 1.0; static const float epsilon = 1.0;
...@@ -211,6 +274,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -211,6 +274,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
if (totalEnergy) if (totalEnergy)
*totalEnergy += recipEnergy; *totalEnergy += recipEnergy;
pme_destroy(pmedata); pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<RealVec> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = (RealOpenMM)C6params[i];
dpmeforces.push_back(RealVec());
}
RealOpenMM recipDispersionEnergy = 0.0;
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
}
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
pme_destroy(pmedata);
}
} }
// Ewald method // Ewald method
...@@ -224,7 +310,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -224,7 +310,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup K-vectors // setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z] #define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3); vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms); vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms); vector<d_complex> tab_qxyz(numberOfAtoms);
...@@ -232,15 +318,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -232,15 +318,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
for (int i = 0; (i < numberOfAtoms); i++) { for (int i = 0; (i < numberOfAtoms); i++) {
float* pos = posq+4*i; float* pos = posq+4*i;
for (int m = 0; (m < 3); m++) for (int m = 0; (m < 3); m++)
EIR(0, i, m) = d_complex(1,0); EIR(0, i, m) = d_complex(1,0);
for (int m=0; (m<3); m++) for (int m=0; (m<3); m++)
EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]), EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
sin(pos[m]*recipBoxSize[m])); sin(pos[m]*recipBoxSize[m]));
for (int j=2; (j<kmax); j++) for (int j=2; (j<kmax); j++)
for (int m=0; (m<3); m++) for (int m=0; (m<3); m++)
EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m); EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
} }
// calculate reciprocal space energy and forces // calculate reciprocal space energy and forces
...@@ -254,11 +340,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -254,11 +340,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
float ky = ry * recipBoxSize[1]; float ky = ry * recipBoxSize[1];
if (ry >= 0) { if (ry >= 0) {
for (int n = 0; n < numberOfAtoms; n++) for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1); tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
} }
else { else {
for (int n = 0; n < numberOfAtoms; n++) for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1)); tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
} }
for (int rz = lowrz; rz < numRz; rz++) { for (int rz = lowrz; rz < numRz; rz++) {
if (rz >= 0) { if (rz >= 0) {
...@@ -301,13 +387,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -301,13 +387,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters, void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) { const vector<float>& C6params, const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
this->posq = posq; this->posq = posq;
this->atomCoordinates = &atomCoordinates[0]; this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0]; this->atomParameters = &atomParameters[0];
this->C6params = &C6params[0];
this->exclusions = &exclusions[0]; this->exclusions = &exclusions[0];
this->threadForce = &threadForce; this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL); includeEnergy = (totalEnergy != NULL);
...@@ -319,7 +406,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -319,7 +406,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this); ComputeDirectTask task(*this);
threads.execute(task); threads.execute(task); // ACS calls threadcomputedirect
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to subtract the exclusions. // Signal the threads to subtract the exclusions.
...@@ -350,9 +437,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -350,9 +437,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float* forces = &(*threadForce)[threadIndex][0]; float* forces = &(*threadForce)[threadIndex][0];
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0); fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0); fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (ewald || pme) { if (ewald || pme || ljpme) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
while (true) { while (true) {
int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1); int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
if (nextBlock >= neighborList->getNumBlocks()) if (nextBlock >= neighborList->getNumBlocks())
...@@ -370,7 +456,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -370,7 +456,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
break; break;
int end = min(start+groupSize, numberOfAtoms); int end = min(start+groupSize, numberOfAtoms);
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f); fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]); float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]);
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) { for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
if (*iter > i) { if (*iter > i) {
...@@ -394,7 +480,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -394,7 +480,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR; threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
} }
else if (includeEnergy) else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3]; threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
if (ljpme) {
float C6ij = C6params[i]*C6params[j];
float inverseR2 = 1.0f/r2;
float emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
if(includeEnergy)
threadEnergy[threadIndex] += emult;
float dEdR = -6.0f*C6ij*inverseR2*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j);
}
} }
} }
} }
...@@ -444,7 +541,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -444,7 +541,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
} }
float sig = atomParameters[ii].first + atomParameters[jj].first; float sig = atomParameters[ii].first + atomParameters[jj].first;
float sig2 = inverseR*sig; float sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
float sig6 = sig2*sig2*sig2; float sig6 = sig2*sig2*sig2;
float eps = atomParameters[ii].second*atomParameters[jj].second; float eps = atomParameters[ii].second*atomParameters[jj].second;
...@@ -476,7 +573,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -476,7 +573,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
fvec4 result = deltaR*dEdR; fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii); (fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj); (fvec4(forces+4*jj)-result).store(forces+4*jj);
} }
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
deltaR = posJ-posI; deltaR = posJ-posI;
...@@ -502,3 +599,18 @@ float CpuNonbondedForce::erfcApprox(float x) { ...@@ -502,3 +599,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
return coeff1*erfcTable[index] + coeff2*erfcTable[index+1]; return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
} }
float CpuNonbondedForce::exptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*exptermsTable[index] + coeff2*exptermsTable[index+1];
}
float CpuNonbondedForce::dExptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*dExptermsTable[index] + coeff2*dExptermsTable[index+1];
}
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h" #include "CpuNonbondedForceVec4.h"
#include <algorithm> #include <algorithm>
#include <iostream>
using namespace std; using namespace std;
using namespace OpenMM; using namespace OpenMM;
...@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces, ...@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions. // Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType; PeriodicType periodicType;
fvec4 blockCenter; fvec4 blockCenter;
if (!periodic) { if (!periodic) {
...@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces ...@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
template <int PERIODIC_TYPE> template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) { void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4]; fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f); fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
...@@ -278,9 +277,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -278,9 +277,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]); fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first); fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second); fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
fvec4 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic); const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block. // Loop over neighbors for this block.
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
...@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 sig2 = inverseR*sig; fvec4 sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
fvec4 sig6 = sig2*sig2*sig2; fvec4 sig6 = sig2*sig2*sig2;
fvec4 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6; fvec4 eps = blockAtomEpsilon*atomEpsilon;
fvec4 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f); dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f); energy = epsSig6*(sig6-1.0f);
if (useSwitch) { if (useSwitch) {
...@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r; dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue; energy *= switchValue;
} }
if (ljpme) {
fvec4 C6ij = C6s*C6params[atom];
fvec4 inverseR2 = inverseR*inverseR;
fvec4 mysig2 = sig*sig;
fvec4 mysig6 = mysig2*mysig2*mysig2;
fvec4 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec4 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
} }
else { else {
energy = 0.0f; energy = 0.0f;
...@@ -362,7 +374,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -362,7 +374,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
} }
// Record the forces on the block atoms. // Record the forces on the block atoms.
fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f}; fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f};
transpose(f[0], f[1], f[2], f[3]); transpose(f[0], f[1], f[2], f[3]);
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
...@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) { ...@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
transpose(t1, t2, t3, t4); transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2; return coeff1*t1 + coeff2*t2;
} }
fvec4 CpuNonbondedForceVec4::exptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&exptermsTable[index[0]]);
fvec4 t2(&exptermsTable[index[1]]);
fvec4 t3(&exptermsTable[index[2]]);
fvec4 t4(&exptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::dExptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&dExptermsTable[index[0]]);
fvec4 t2(&dExptermsTable[index[1]]);
fvec4 t3(&dExptermsTable[index[2]]);
fvec4 t4(&dExptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h" #include "openmm/internal/hardware.h"
#include <algorithm> #include <algorithm>
#include <iostream>
using namespace std; using namespace std;
using namespace OpenMM; using namespace OpenMM;
...@@ -80,8 +81,7 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() { ...@@ -80,8 +81,7 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic}; enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) { void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions. // Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType; PeriodicType periodicType;
fvec4 blockCenter; fvec4 blockCenter;
if (!periodic) { if (!periodic) {
...@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
blockAtomCharge *= ONE_4PI_EPS0; blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first); fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second); fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
fvec8 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]], C6params[blockAtom[4]], C6params[blockAtom[5]], C6params[blockAtom[6]], C6params[blockAtom[7]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic); const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
...@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec8 sig2 = inverseR*sig; fvec8 sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
fvec8 sig6 = sig2*sig2*sig2; fvec8 sig6 = sig2*sig2*sig2;
fvec8 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6; fvec8 eps = blockAtomEpsilon*atomEpsilon;
fvec8 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f); dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f); energy = epsSig6*(sig6-1.0f);
if (useSwitch) { if (useSwitch) {
...@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo ...@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r; dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue; energy *= switchValue;
} }
if (ljpme) {
fvec8 C6ij = C6s*C6params[atom];
fvec8 inverseR2 = inverseR*inverseR;
fvec8 mysig2 = sig*sig;
fvec8 mysig6 = mysig2*mysig2*mysig2;
fvec8 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec8 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
} }
else { else {
energy = 0.0f; energy = 0.0f;
...@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) { ...@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4); transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2; return coeff1*s1 + coeff2*s2;
} }
fvec8 CpuNonbondedForceVec8::exptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&exptermsTable[indexLower[0]]);
fvec4 t2(&exptermsTable[indexLower[1]]);
fvec4 t3(&exptermsTable[indexLower[2]]);
fvec4 t4(&exptermsTable[indexLower[3]]);
fvec4 t5(&exptermsTable[indexUpper[0]]);
fvec4 t6(&exptermsTable[indexUpper[1]]);
fvec4 t7(&exptermsTable[indexUpper[2]]);
fvec4 t8(&exptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::dExptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&dExptermsTable[indexLower[0]]);
fvec4 t2(&dExptermsTable[indexLower[1]]);
fvec4 t3(&dExptermsTable[indexLower[2]]);
fvec4 t4(&dExptermsTable[indexLower[3]]);
fvec4 t5(&dExptermsTable[indexUpper[0]]);
fvec4 t6(&dExptermsTable[indexUpper[1]]);
fvec4 t7(&dExptermsTable[indexUpper[2]]);
fvec4 t8(&dExptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
#endif #endif
...@@ -598,8 +598,10 @@ private: ...@@ -598,8 +598,10 @@ private:
class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel { class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform), CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL), cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), C6s(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) { directDispersionPmeGrid(NULL), reciprocalDispersionPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeAtomDispersionGridIndex(NULL),
pmeEnergyBuffer(NULL), dispersionPmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL), dispersionPmeio(NULL) {
} }
~CudaCalcNonbondedForceKernel(); ~CudaCalcNonbondedForceKernel();
/** /**
...@@ -636,6 +638,15 @@ public: ...@@ -636,6 +638,15 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class SortTrait : public CudaSort::SortTrait { class SortTrait : public CudaSort::SortTrait {
int getDataSize() const {return 8;} int getDataSize() const {return 8;}
...@@ -655,38 +666,55 @@ private: ...@@ -655,38 +666,55 @@ private:
CudaContext& cu; CudaContext& cu;
bool hasInitializedFFT; bool hasInitializedFFT;
CudaArray* sigmaEpsilon; CudaArray* sigmaEpsilon;
CudaArray* C6s;
CudaArray* exceptionParams; CudaArray* exceptionParams;
CudaArray* cosSinSums; CudaArray* cosSinSums;
CudaArray* directPmeGrid; CudaArray* directPmeGrid;
CudaArray* reciprocalPmeGrid; CudaArray* reciprocalPmeGrid;
CudaArray* directDispersionPmeGrid;
CudaArray* reciprocalDispersionPmeGrid;
CudaArray* pmeBsplineModuliX; CudaArray* pmeBsplineModuliX;
CudaArray* pmeBsplineModuliY; CudaArray* pmeBsplineModuliY;
CudaArray* pmeBsplineModuliZ; CudaArray* pmeBsplineModuliZ;
CudaArray* pmeAtomRange; CudaArray* pmeAtomRange;
CudaArray* pmeAtomGridIndex; CudaArray* pmeAtomGridIndex;
CudaArray* pmeAtomDispersionGridIndex;
CudaArray* pmeEnergyBuffer; CudaArray* pmeEnergyBuffer;
CudaArray* dispersionPmeEnergyBuffer;
CudaSort* sort; CudaSort* sort;
Kernel cpuPme; Kernel cpuPme;
Kernel cpuDispersionPme;
PmeIO* pmeio; PmeIO* pmeio;
CUstream pmeStream; PmeIO* dispersionPmeio;
CUevent pmeSyncEvent; CUstream pmeStream, dispersionPmeStream;
CUevent pmeSyncEvent, dispersionPmeSyncEvent;
CudaFFT3D* fft; CudaFFT3D* fft;
cufftHandle fftForward; cufftHandle fftForward;
cufftHandle fftBackward; cufftHandle fftBackward;
CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward;
CUfunction ewaldSumsKernel; CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel; CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel; CUfunction pmeGridIndexKernel;
CUfunction pmeDispersionGridIndexKernel;
CUfunction pmeSpreadChargeKernel; CUfunction pmeSpreadChargeKernel;
CUfunction pmeDispersionSpreadChargeKernel;
CUfunction pmeFinishSpreadChargeKernel; CUfunction pmeFinishSpreadChargeKernel;
CUfunction pmeDispersionFinishSpreadChargeKernel;
CUfunction pmeEvalEnergyKernel; CUfunction pmeEvalEnergyKernel;
CUfunction pmeEvalDispersionEnergyKernel;
CUfunction pmeConvolutionKernel; CUfunction pmeConvolutionKernel;
CUfunction pmeDispersionConvolutionKernel;
CUfunction pmeInterpolateForceKernel; CUfunction pmeInterpolateForceKernel;
CUfunction pmeInterpolateDispersionForceKernel;
std::map<std::string, std::string> pmeDefines; std::map<std::string, std::string> pmeDefines;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha; double ewaldSelfEnergy, dispersionSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int interpolateForceThreads; int interpolateForceThreads;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT; int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
......
...@@ -439,6 +439,15 @@ public: ...@@ -439,6 +439,15 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class Task; class Task;
CudaPlatform::PlatformData& data; CudaPlatform::PlatformData& data;
......
This diff is collapsed.
...@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& ...@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz); dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz);
} }
void CudaParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getLJPMEParameters(alpha, nx, ny, nz);
}
class CudaParallelCalcCustomNonbondedForceKernel::Task : public CudaContext::WorkTask { class CudaParallelCalcCustomNonbondedForceKernel::Task : public CudaContext::WorkTask {
public: public:
Task(ContextImpl& context, CudaCalcCustomNonbondedForceKernel& kernel, bool includeForce, Task(ContextImpl& context, CudaCalcCustomNonbondedForceKernel& kernel, bool includeForce,
......
...@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys ...@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name"); CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
deviceName << name; deviceName << name;
} }
size_t printfsize;
cuCtxGetLimit(&printfsize, CU_LIMIT_PRINTF_FIFO_SIZE);
cuCtxSetLimit(CU_LIMIT_PRINTF_FIFO_SIZE, 10*printfsize);
useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision()); useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
disablePmeStream = (pmeStreamProperty == "true"); disablePmeStream = (pmeStreamProperty == "true");
deterministicForces = (deterministicForcesProperty == "true"); deterministicForces = (deterministicForcesProperty == "true");
......
...@@ -17,6 +17,25 @@ ...@@ -17,6 +17,25 @@
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr; const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif #endif
real tempForce = 0.0f; real tempForce = 0.0f;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = C6s1*C6s2;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) { if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
...@@ -29,6 +48,13 @@ ...@@ -29,6 +48,13 @@
includeInteraction = false; includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w; tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
} }
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
} }
else { else {
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
...@@ -36,7 +62,8 @@ ...@@ -36,7 +62,8 @@
real sig2 = invR*sig; real sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
real sig6 = sig2*sig2*sig2; real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y); real eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f); tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f); real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH #if USE_LJ_SWITCH
...@@ -48,6 +75,22 @@ ...@@ -48,6 +75,22 @@
ljEnergy *= switchValue; ljEnergy *= switchValue;
} }
#endif #endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0; tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0;
#else #else
......
extern "C" __global__ void findAtomDispersionGridIndex(const real4* __restrict__ posq, int2* __restrict__ pmeAtomGridIndex,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Compute the index of the grid point each atom is associated with.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real4 pos = posq[i];
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*DISPERSION_GRID_SIZE_X;
t.y = (t.y-floor(t.y))*DISPERSION_GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*DISPERSION_GRID_SIZE_Z;
int3 gridIndex = make_int3(((int) t.x) % DISPERSION_GRID_SIZE_X,
((int) t.y) % DISPERSION_GRID_SIZE_Y,
((int) t.z) % DISPERSION_GRID_SIZE_Z);
pmeAtomGridIndex[i] = make_int2(i, gridIndex.x*DISPERSION_GRID_SIZE_Y*DISPERSION_GRID_SIZE_Z+gridIndex.y*DISPERSION_GRID_SIZE_Z+gridIndex.z);
}
}
extern "C" __global__ void gridSpreadC6(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex,
const real* __restrict__ C6s) {
real3 data[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
// Process the atoms in spatially sorted order. This improves efficiency when writing
// the grid values.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom];
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*DISPERSION_GRID_SIZE_X;
t.y = (t.y-floor(t.y))*DISPERSION_GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*DISPERSION_GRID_SIZE_Z;
int3 gridIndex = make_int3(((int) t.x) % DISPERSION_GRID_SIZE_X,
((int) t.y) % DISPERSION_GRID_SIZE_Y,
((int) t.z) % DISPERSION_GRID_SIZE_Z);
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real3 dr = make_real3(t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z);
data[PME_ORDER-1] = make_real3(0);
data[1] = dr;
data[0] = make_real3(1)-dr;
for (int j = 3; j < PME_ORDER; j++) {
real div = RECIP(j-1);
data[j-1] = div*dr*data[j-2];
for (int k = 1; k < (j-1); k++)
data[j-k-1] = div*((dr+make_real3(k))*data[j-k-2] + (make_real3(j-k)-dr)*data[j-k-1]);
data[0] = div*(make_real3(1)-dr)*data[0];
}
data[PME_ORDER-1] = scale*dr*data[PME_ORDER-2];
for (int j = 1; j < (PME_ORDER-1); j++)
data[PME_ORDER-j-1] = scale*((dr+make_real3(j))*data[PME_ORDER-j-2] + (make_real3(PME_ORDER-j)-dr)*data[PME_ORDER-j-1]);
data[0] = scale*(make_real3(1)-dr)*data[0];
// Spread the charge from this atom onto each grid point.
for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndex.x+ix;
xbase -= (xbase >= DISPERSION_GRID_SIZE_X ? DISPERSION_GRID_SIZE_X : 0);
xbase = xbase*DISPERSION_GRID_SIZE_Y*DISPERSION_GRID_SIZE_Z;
real dx = data[ix].x;
for (int iy = 0; iy < PME_ORDER; iy++) {
int ybase = gridIndex.y+iy;
ybase -= (ybase >= DISPERSION_GRID_SIZE_Y ? DISPERSION_GRID_SIZE_Y : 0);
ybase = xbase + ybase*DISPERSION_GRID_SIZE_Z;
real dy = data[iy].y;
for (int iz = 0; iz < PME_ORDER; iz++) {
int zindex = gridIndex.z+iz;
zindex -= (zindex >= DISPERSION_GRID_SIZE_Z ? DISPERSION_GRID_SIZE_Z : 0);
int index = ybase + zindex;
// We need to grab the C6 coefficient from the array
real add = C6s[atom]*dx*dy*data[iz].z;
#ifdef USE_DOUBLE_PRECISION
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
atomicAdd(&ulonglong_p[index], static_cast<unsigned long long>((long long) (add*0x100000000)));
#elif __CUDA_ARCH__ < 200 || defined(USE_DETERMINISTIC_FORCES)
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
int gridIndex = index;
gridIndex = (gridIndex%2 == 0 ? gridIndex/2 : (gridIndex+DISPERSION_GRID_SIZE_X*DISPERSION_GRID_SIZE_Y*DISPERSION_GRID_SIZE_Z)/2);
atomicAdd(&ulonglong_p[gridIndex], static_cast<unsigned long long>((long long) (add*0x100000000)));
#else
atomicAdd(&originalPmeGrid[index], add);
#endif
}
}
}
}
}
extern "C" __global__ void finishSpreadC6(long long* __restrict__ originalPmeGrid) {
real* floatGrid = (real*) originalPmeGrid;
const unsigned int gridSize = DISPERSION_GRID_SIZE_X*DISPERSION_GRID_SIZE_Y*DISPERSION_GRID_SIZE_Z;
real scale = 1.0f/(real) 0x100000000;
#ifdef USE_DOUBLE_PRECISION
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x)
floatGrid[index] = scale*originalPmeGrid[index];
#else
for (int index = 2*(blockIdx.x*blockDim.x+threadIdx.x); index < gridSize; index += 2*blockDim.x*gridDim.x) {
floatGrid[index] = scale*originalPmeGrid[index/2];
if (index+1 < gridSize)
floatGrid[index+1] = scale*originalPmeGrid[(index+gridSize+1)/2];
}
#endif
}
// convolutes the dispersion grid on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern "C" __global__ void
reciprocalDispersionConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ energyBuffer,
const real* __restrict__ pmeBsplineModuliX, const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
const real scaleFactor = -2*M_PI*SQRT(M_PI)*RECIP(6*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
const real alpha = EWALD_DISPERSION_ALPHA;
real bfac = M_PI / alpha;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = alpha*alpha*alpha;
real fac3 = -2*alpha*M_PI*M_PI;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
// real indices
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
int remainder = index-kx*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
int ky = remainder/(GRID_SIZE_Z/2+1);
int kz = remainder-ky*(GRID_SIZE_Z/2+1);
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
real mhx = mx*recipBoxVecX.x;
real mhy = mx*recipBoxVecY.x+my*recipBoxVecY.y;
real mhz = mx*recipBoxVecZ.x+my*recipBoxVecZ.y+mz*recipBoxVecZ.z;
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
real denom = scaleFactor/(bx*by*bz);
real2 grid = halfcomplex_pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
#if FAST_ERFC
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7. Stolen by ACS from the CUDA platform's AMOEBA plugin.
real t = 1.0f/(1.0f+0.3275911f*b);
real erfcterm = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expterm;
#else
real erfcterm = ERFC(b);
#endif
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
halfcomplex_pmeGrid[index] = make_real2(grid.x*eterm, grid.y*eterm);
}
}
extern "C" __global__ void
gridEvaluateDispersionEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ energyBuffer,
const real* __restrict__ pmeBsplineModuliX, const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = DISPERSION_GRID_SIZE_X*DISPERSION_GRID_SIZE_Y*DISPERSION_GRID_SIZE_Z;
const real scaleFactor = -2*M_PI*SQRT(M_PI)*RECIP(6*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
const real alpha = EWALD_DISPERSION_ALPHA;
real bfac = M_PI / alpha;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = alpha*alpha*alpha;
real fac3 = -2*alpha*M_PI*M_PI;
mixed energy = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
// real indices
int kx = index/(DISPERSION_GRID_SIZE_Y*(DISPERSION_GRID_SIZE_Z));
int remainder = index-kx*DISPERSION_GRID_SIZE_Y*(DISPERSION_GRID_SIZE_Z);
int ky = remainder/(DISPERSION_GRID_SIZE_Z);
int kz = remainder-ky*(DISPERSION_GRID_SIZE_Z);
int mx = (kx < (DISPERSION_GRID_SIZE_X+1)/2) ? kx : (kx-DISPERSION_GRID_SIZE_X);
int my = (ky < (DISPERSION_GRID_SIZE_Y+1)/2) ? ky : (ky-DISPERSION_GRID_SIZE_Y);
int mz = (kz < (DISPERSION_GRID_SIZE_Z+1)/2) ? kz : (kz-DISPERSION_GRID_SIZE_Z);
real mhx = mx*recipBoxVecX.x;
real mhy = mx*recipBoxVecY.x+my*recipBoxVecY.y;
real mhz = mx*recipBoxVecZ.x+my*recipBoxVecZ.y+mz*recipBoxVecZ.z;
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
real denom = scaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
#if FAST_ERFC
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7. Stolen by ACS from the CUDA platform's AMOEBA plugin.
real t = 1.0f/(1.0f+0.3275911f*b);
real erfcterm = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expterm;
#else
real erfcterm = ERFC(b);
#endif
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
if (kz >= (DISPERSION_GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : DISPERSION_GRID_SIZE_X-kx);
ky = ((ky == 0) ? ky : DISPERSION_GRID_SIZE_Y-ky);
kz = DISPERSION_GRID_SIZE_Z-kz;
}
int indexInHalfComplexGrid = kz + ky*(DISPERSION_GRID_SIZE_Z/2+1)+kx*(DISPERSION_GRID_SIZE_Y*(DISPERSION_GRID_SIZE_Z/2+1));
real2 grid = halfcomplex_pmeGrid[indexInHalfComplexGrid];
// N.B. We inlcude the 0,0,0 point for dispersion
energy += eterm*(grid.x*grid.x + grid.y*grid.y);
}
#ifdef USE_PME_STREAM
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = 0.5f*energy;
#else
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energy;
#endif
}
extern "C" __global__
void gridInterpolateDispersionForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex, const real* __restrict__ C6s) {
real3 data[PME_ORDER];
real3 ddata[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
// Process the atoms in spatially sorted order. This improves cache performance when loading
// the grid values.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
int atom = pmeAtomGridIndex[i].x;
real3 force = make_real3(0);
real4 pos = posq[atom];
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z);
t.x = (t.x-floor(t.x))*DISPERSION_GRID_SIZE_X;
t.y = (t.y-floor(t.y))*DISPERSION_GRID_SIZE_Y;
t.z = (t.z-floor(t.z))*DISPERSION_GRID_SIZE_Z;
int3 gridIndex = make_int3(((int) t.x) % DISPERSION_GRID_SIZE_X,
((int) t.y) % DISPERSION_GRID_SIZE_Y,
((int) t.z) % DISPERSION_GRID_SIZE_Z);
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real3 dr = make_real3(t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z);
data[PME_ORDER-1] = make_real3(0);
data[1] = dr;
data[0] = make_real3(1)-dr;
for (int j = 3; j < PME_ORDER; j++) {
real div = RECIP(j-1);
data[j-1] = div*dr*data[j-2];
for (int k = 1; k < (j-1); k++)
data[j-k-1] = div*((dr+make_real3(k))*data[j-k-2] + (make_real3(j-k)-dr)*data[j-k-1]);
data[0] = div*(make_real3(1)-dr)*data[0];
}
ddata[0] = -data[0];
for (int j = 1; j < PME_ORDER; j++)
ddata[j] = data[j-1]-data[j];
data[PME_ORDER-1] = scale*dr*data[PME_ORDER-2];
for (int j = 1; j < (PME_ORDER-1); j++)
data[PME_ORDER-j-1] = scale*((dr+make_real3(j))*data[PME_ORDER-j-2] + (make_real3(PME_ORDER-j)-dr)*data[PME_ORDER-j-1]);
data[0] = scale*(make_real3(1)-dr)*data[0];
// Compute the force on this atom.
for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndex.x+ix;
xbase -= (xbase >= DISPERSION_GRID_SIZE_X ? DISPERSION_GRID_SIZE_X : 0);
xbase = xbase*DISPERSION_GRID_SIZE_Y*DISPERSION_GRID_SIZE_Z;
real dx = data[ix].x;
real ddx = ddata[ix].x;
for (int iy = 0; iy < PME_ORDER; iy++) {
int ybase = gridIndex.y+iy;
ybase -= (ybase >= DISPERSION_GRID_SIZE_Y ? DISPERSION_GRID_SIZE_Y : 0);
ybase = xbase + ybase*DISPERSION_GRID_SIZE_Z;
real dy = data[iy].y;
real ddy = ddata[iy].y;
for (int iz = 0; iz < PME_ORDER; iz++) {
int zindex = gridIndex.z+iz;
zindex -= (zindex >= DISPERSION_GRID_SIZE_Z ? DISPERSION_GRID_SIZE_Z : 0);
int index = ybase + zindex;
real gridvalue = originalPmeGrid[index];
force.x += ddx*dy*data[iz].z*gridvalue;
force.y += dx*ddy*data[iz].z*gridvalue;
force.z += dx*dy*ddata[iz].z*gridvalue;
}
}
}
real q = C6s[atom];
real forceX = -q*(force.x*DISPERSION_GRID_SIZE_X*recipBoxVecX.x);
real forceY = -q*(force.x*DISPERSION_GRID_SIZE_X*recipBoxVecY.x+force.y*DISPERSION_GRID_SIZE_Y*recipBoxVecY.y);
real forceZ = -q*(force.x*DISPERSION_GRID_SIZE_X*recipBoxVecZ.x+force.y*DISPERSION_GRID_SIZE_Y*recipBoxVecZ.y+force.z*DISPERSION_GRID_SIZE_Z*recipBoxVecZ.z);
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (forceX*0x100000000)));
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forceY*0x100000000)));
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (forceZ*0x100000000)));
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment