Commit 53f770f4 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1557 from andysim/dpme

Dispersion PME
parents f36f36bf c0850062
......@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz);
}
void CudaParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getLJPMEParameters(alpha, nx, ny, nz);
}
class CudaParallelCalcCustomNonbondedForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomNonbondedForceKernel& kernel, bool includeForce,
......
......@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
deviceName << name;
}
size_t printfsize;
cuCtxGetLimit(&printfsize, CU_LIMIT_PRINTF_FIFO_SIZE);
cuCtxSetLimit(CU_LIMIT_PRINTF_FIFO_SIZE, 10*printfsize);
useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
disablePmeStream = (pmeStreamProperty == "true");
deterministicForces = (deterministicForcesProperty == "true");
......
......@@ -17,6 +17,26 @@
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif
real tempForce = 0.0f;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
......@@ -29,6 +49,13 @@
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
}
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
}
else {
#if HAS_LENNARD_JONES
......@@ -36,7 +63,8 @@
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
real eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
......@@ -48,6 +76,22 @@
ljEnergy *= switchValue;
}
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0;
#else
......
......@@ -21,7 +21,11 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon
#endif
) {
real3 data[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
......@@ -63,6 +67,12 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
// Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndex.x+ix;
xbase -= (xbase >= GRID_SIZE_X ? GRID_SIZE_X : 0);
......@@ -80,7 +90,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = ybase + zindex;
real add = pos.w*dx*dy*data[iz].z;
real add = charge*dx*dy*data[iz].z;
#ifdef USE_DOUBLE_PRECISION
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
atomicAdd(&ulonglong_p[index], static_cast<unsigned long long>((long long) (add*0x100000000)));
......@@ -121,7 +131,15 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -2*M_PI*SQRT(M_PI)*RECIP(6*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
#endif
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
// real indices
......@@ -140,12 +158,23 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real bz = pmeBsplineModuliZ[kz];
real2 grid = halfcomplex_pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = ERFC(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
halfcomplex_pmeGrid[index] = make_real2(grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) {
halfcomplex_pmeGrid[index] = make_real2(grid.x*eterm, grid.y*eterm);
}
#endif
}
}
......@@ -156,7 +185,15 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -2*M_PI*SQRT(M_PI)*RECIP(6*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
#endif
mixed energy = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
......@@ -175,8 +212,19 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = ERFC(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
......@@ -185,11 +233,12 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
}
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = halfcomplex_pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 || kz != 0) {
#ifndef USE_LJPME
if (kx != 0 || ky != 0 || kz != 0)
#endif
energy += eterm*(grid.x*grid.x + grid.y*grid.y);
}
}
#ifdef USE_PME_STREAM
#if defined(USE_PME_STREAM) && !defined(USE_LJPME)
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = 0.5f*energy;
#else
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energy;
......@@ -199,7 +248,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern "C" __global__
void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon
#endif
) {
real3 data[PME_ORDER];
real3 ddata[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
......@@ -271,7 +324,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
}
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
real q = pos.w*EPSILON_FACTOR;
#endif
real forceX = -q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
real forceZ = -q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
......@@ -591,8 +591,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcNonbondedForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL),
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL),
pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) {
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL),
pmeDispersionBsplineModuliY(NULL), pmeDispersionBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeAtomRange(NULL),
pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), dispersionFft(NULL), pmeio(NULL) {
}
~OpenCLCalcNonbondedForceKernel();
/**
......@@ -629,6 +630,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class SortTrait : public OpenCLSort::SortTrait {
int getDataSize() const {return 8;}
......@@ -657,6 +667,9 @@ private:
OpenCLArray* pmeBsplineModuliX;
OpenCLArray* pmeBsplineModuliY;
OpenCLArray* pmeBsplineModuliZ;
OpenCLArray* pmeDispersionBsplineModuliX;
OpenCLArray* pmeDispersionBsplineModuliY;
OpenCLArray* pmeDispersionBsplineModuliZ;
OpenCLArray* pmeBsplineTheta;
OpenCLArray* pmeAtomRange;
OpenCLArray* pmeAtomGridIndex;
......@@ -665,25 +678,34 @@ private:
cl::CommandQueue pmeQueue;
cl::Event pmeSyncEvent;
OpenCLFFT3D* fft;
OpenCLFFT3D* dispersionFft;
Kernel cpuPme;
PmeIO* pmeio;
SyncQueuePostComputation* syncQueue;
cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel;
cl::Kernel pmeGridIndexKernel;
cl::Kernel pmeAtomRangeKernel;
cl::Kernel pmeDispersionAtomRangeKernel;
cl::Kernel pmeZIndexKernel;
cl::Kernel pmeDispersionZIndexKernel;
cl::Kernel pmeUpdateBsplinesKernel;
cl::Kernel pmeDispersionUpdateBsplinesKernel;
cl::Kernel pmeSpreadChargeKernel;
cl::Kernel pmeDispersionSpreadChargeKernel;
cl::Kernel pmeFinishSpreadChargeKernel;
cl::Kernel pmeDispersionFinishSpreadChargeKernel;
cl::Kernel pmeConvolutionKernel;
cl::Kernel pmeDispersionConvolutionKernel;
cl::Kernel pmeEvalEnergyKernel;
cl::Kernel pmeDispersionEvalEnergyKernel;
cl::Kernel pmeInterpolateForceKernel;
cl::Kernel pmeDispersionInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines;
std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha;
double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeQueue;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeQueue, doLJPME;
NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5;
};
......
......@@ -438,6 +438,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class Task;
OpenCLPlatform::PlatformData& data;
......
This diff is collapsed.
......@@ -583,6 +583,10 @@ void OpenCLParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int
dynamic_cast<const OpenCLCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz);
}
void OpenCLParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const OpenCLCalcNonbondedForceKernel&>(kernels[0].getImpl()).getLJPMEParameters(alpha, nx, ny, nz);
}
class OpenCLParallelCalcCustomNonbondedForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomNonbondedForceKernel& kernel, bool includeForce,
......
......@@ -22,6 +22,26 @@
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif
real tempForce = 0;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
......@@ -34,6 +54,13 @@
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
}
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
}
else {
#if HAS_LENNARD_JONES
......@@ -41,7 +68,8 @@
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
real eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
......@@ -53,6 +81,22 @@
ljEnergy *= switchValue;
}
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
#else
......
__kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache,
__global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
const real4 scale = 1/(real) (PME_ORDER-1);
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
__local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
......@@ -33,7 +37,13 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
data[PME_ORDER-j-1] = scale*((dr+(real4) j)*data[PME_ORDER-j-2] + (-dr+(real4) (PME_ORDER-j))*data[PME_ORDER-j-1]);
data[0] = scale*(-dr+1.0f)*data[0];
for (int j = 0; j < PME_ORDER; j++) {
data[j].w = pos.w; // Storing the charge here improves cache coherency in the charge spreading kernel
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
data[j].w = charge; // Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta[i+j*NUM_ATOMS] = data[j];
}
#endif
......@@ -86,7 +96,11 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER];
......@@ -128,6 +142,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
// Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
for (int ix = 0; ix < PME_ORDER; ix++) {
int xindex = gridIndex.x+ix;
xindex -= (xindex >= GRID_SIZE_X ? GRID_SIZE_X : 0);
......@@ -138,7 +158,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
real add = pos.w*data[ix].x*data[iy].y*data[iz].z;
real add = charge*data[ix].x*data[iy].y*data[iz].z;
#ifdef USE_ALTERNATE_MEMORY_ACCESS_PATTERN
// On Nvidia devices (at least Maxwell anyway), this split ordering produces much higher performance. Why?
// I have no idea! And of course on AMD it produces slower performance. GPUs are not meant to be understood.
......@@ -167,7 +187,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
#elif defined(DEVICE_IS_CPU)
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0);
const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0);
if (firstx == lastx)
......@@ -194,6 +218,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
// Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
bool hasComputedThetas = false;
for (int ix = 0; ix < PME_ORDER; ix++) {
int xindex = gridIndex.x+ix;
......@@ -229,7 +259,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
pmeGrid[index] += EPSILON_FACTOR*pos.w*data[ix].x*data[iy].y*data[iz].z;
pmeGrid[index] += EPSILON_FACTOR*charge*data[ix].x*data[iy].y*data[iz].z;
}
}
}
......@@ -237,7 +267,11 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
}
#else
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta) {
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
// Compute the charge on a grid point.
......@@ -298,7 +332,15 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
......@@ -317,11 +359,23 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
real bz = pmeBsplineModuliZ[kz];
real2 grid = pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) {
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
}
#endif
}
}
......@@ -330,7 +384,15 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
mixed energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
......@@ -349,8 +411,19 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky);
......@@ -358,11 +431,12 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
}
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 || kz != 0) {
#ifndef USE_LJPME
if (kx != 0 || ky != 0 || kz != 0)
#endif
energy += eterm*(grid.x*grid.x + grid.y*grid.y);
}
}
#ifdef USE_PME_STREAM
#if defined(USE_PME_STREAM) && !defined(USE_LJPME)
energyBuffer[get_global_id(0)] = 0.5f*energy;
#else
energyBuffer[get_global_id(0)] += 0.5f*energy;
......@@ -371,7 +445,11 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
__kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX,
real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) {
real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER];
real4 ddata[PME_ORDER];
......@@ -436,7 +514,12 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
}
}
real4 totalForce = forceBuffers[atom];
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
real q = pos.w*EPSILON_FACTOR;
#endif
totalForce.x -= q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
totalForce.y -= q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
totalForce.z -= q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
......@@ -604,12 +604,21 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
int numParticles, num14;
int **bonded14IndexArray;
RealOpenMM **particleParamArray, **bonded14ParamArray;
RealOpenMM nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, dispersionCoefficient;
int kmax[3], gridSize[3];
RealOpenMM nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, dispersionCoefficient;
int kmax[3], gridSize[3], dispersionGridSize[3];
bool useSwitchingFunction;
std::vector<std::set<int> > exclusions;
NonbondedMethod nonbondedMethod;
......
......@@ -38,14 +38,14 @@ class ReferenceLJCoulombIxn {
bool useSwitch;
bool periodic;
bool ewald;
bool pme;
bool pme, ljpme;
const OpenMM::NeighborList* neighborList;
OpenMM::RealVec periodicBoxVectors[3];
RealOpenMM cutoffDistance, switchingDistance;
RealOpenMM krf, crf;
RealOpenMM alphaEwald;
RealOpenMM alphaEwald, alphaDispersionEwald;
int numRx, numRy, numRz;
int meshDim[3];
int meshDim[3], dispersionMeshDim[3];
// parameter indices
......@@ -149,6 +149,18 @@ class ReferenceLJCoulombIxn {
void setUsePME(RealOpenMM alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param dalpha the dispersion Ewald separation parameter
@param dgridSize the dimensions of the dispersion mesh
--------------------------------------------------------------------------------------- */
void setUseLJPME(RealOpenMM dalpha, int dmeshSize[3]);
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn
......
......@@ -87,6 +87,28 @@ pme_exec(pme_t pme,
RealOpenMM * energy);
/*
* Evaluate reciprocal space PME dispersion energy and forces.
*
* Args:
*
* pme Opaque pme_t object, must have been initialized with pme_init()
* x Pointer to coordinate data array (nm)
* f Pointer to force data array (will be written as kJ/mol/nm)
* c6s Array of c6 coefficients (units of sqrt(kJ/mol).nm^3 )
* box Simulation cell dimensions (nm)
* energy Total energy (will be written in units of kJ/mol)
*/
int OPENMM_EXPORT
pme_exec_dpme(pme_t pme,
const std::vector<OpenMM::RealVec>& atomCoordinates,
std::vector<OpenMM::RealVec>& forces,
const std::vector<RealOpenMM>& c6s,
const OpenMM::RealVec periodicBoxVectors[3],
RealOpenMM * energy);
/* Release all memory in pme structure */
int OPENMM_EXPORT
......
......@@ -969,9 +969,17 @@ void ReferenceCalcNonbondedForceKernel::initialize(const System& system, const N
}
else if (nonbondedMethod == PME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = (RealOpenMM) alpha;
}
else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = (RealOpenMM) alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = (RealOpenMM) alpha;
useSwitchingFunction = false;
}
rfDielectric = (RealOpenMM)force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
......@@ -987,11 +995,12 @@ double ReferenceCalcNonbondedForceKernel::execute(ContextImpl& context, bool inc
bool periodic = (nonbondedMethod == CutoffPeriodic);
bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff) {
computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, extractBoxVectors(context), periodic || ewald || pme, nonbondedCutoff, 0.0);
computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, extractBoxVectors(context), periodic || ewald || pme || ljpme, nonbondedCutoff, 0.0);
clj.setUseCutoff(nonbondedCutoff, *neighborList, rfDielectric);
}
if (periodic || ewald || pme) {
if (periodic || ewald || pme || ljpme) {
RealVec* boxVectors = extractBoxVectors(context);
double minAllowedSize = 1.999999*nonbondedCutoff;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
......@@ -1002,6 +1011,10 @@ double ReferenceCalcNonbondedForceKernel::execute(ContextImpl& context, bool inc
clj.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]);
if (pme)
clj.setUsePME(ewaldAlpha, gridSize);
if (ljpme){
clj.setUsePME(ewaldAlpha, gridSize);
clj.setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
if (useSwitchingFunction)
clj.setUseSwitchingFunction(switchingDistance);
clj.calculatePairIxn(numParticles, posData, particleParamArray, exclusions, 0, forceData, 0, includeEnergy ? &energy : NULL, includeDirect, includeReciprocal);
......@@ -1059,14 +1072,23 @@ void ReferenceCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& con
}
void ReferenceCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME or LJPME");
alpha = ewaldAlpha;
nx = gridSize[0];
ny = gridSize[1];
nz = gridSize[2];
}
void ReferenceCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using LJPME");
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
ReferenceCalcCustomNonbondedForceKernel::~ReferenceCalcCustomNonbondedForceKernel() {
disposeRealArray(particleParamArray, numParticles);
if (neighborList != NULL)
......
......@@ -26,6 +26,7 @@
#include <sstream>
#include <complex>
#include <algorithm>
#include <iostream>
#include "SimTKOpenMMUtilities.h"
#include "ReferenceLJCoulombIxn.h"
......@@ -47,7 +48,7 @@ using namespace OpenMM;
--------------------------------------------------------------------------------------- */
ReferenceLJCoulombIxn::ReferenceLJCoulombIxn() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false) {
ReferenceLJCoulombIxn::ReferenceLJCoulombIxn() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false) {
// ---------------------------------------------------------------------------------------
......@@ -73,7 +74,7 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn() {
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
......@@ -83,14 +84,14 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn() {
--------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUseCutoff(RealOpenMM distance, const OpenMM::NeighborList& neighbors, RealOpenMM solventDielectric) {
void ReferenceLJCoulombIxn::setUseCutoff(RealOpenMM distance, const OpenMM::NeighborList& neighbors, RealOpenMM solventDielectric) {
cutoff = true;
cutoffDistance = distance;
neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
}
}
/**---------------------------------------------------------------------------------------
......@@ -105,7 +106,7 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
switchingDistance = distance;
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff
......@@ -115,7 +116,7 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setPeriodic(OpenMM::RealVec* vectors) {
void ReferenceLJCoulombIxn::setPeriodic(OpenMM::RealVec* vectors) {
assert(cutoff);
assert(vectors[0][0] >= 2.0*cutoffDistance);
......@@ -125,9 +126,9 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
periodicBoxVectors[0] = vectors[0];
periodicBoxVectors[1] = vectors[1];
periodicBoxVectors[2] = vectors[2];
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
......@@ -138,15 +139,15 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUseEwald(RealOpenMM alpha, int kmaxx, int kmaxy, int kmaxz) {
void ReferenceLJCoulombIxn::setUseEwald(RealOpenMM alpha, int kmaxx, int kmaxy, int kmaxz) {
alphaEwald = alpha;
numRx = kmaxx;
numRy = kmaxy;
numRz = kmaxz;
ewald = true;
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
......@@ -155,13 +156,30 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUsePME(RealOpenMM alpha, int meshSize[3]) {
void ReferenceLJCoulombIxn::setUsePME(RealOpenMM alpha, int meshSize[3]) {
alphaEwald = alpha;
meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2];
pme = true;
}
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion terms.
@param alpha the dispersion Ewald separation parameter
@param gridSize the dimensions of the dispersion mesh
--------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUseLJPME(RealOpenMM alpha, int meshSize[3]) {
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
}
/**---------------------------------------------------------------------------------------
......@@ -201,16 +219,27 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
RealOpenMM totalSelfEwaldEnergy = 0.0;
RealOpenMM realSpaceEwaldEnergy = 0.0;
RealOpenMM recipEnergy = 0.0;
RealOpenMM recipDispersionEnergy = 0.0;
RealOpenMM totalRecipEnergy = 0.0;
RealOpenMM vdwEnergy = 0.0;
// **************************************************************************************
// SELF ENERGY
// **************************************************************************************
// A couple of sanity checks for
if(ljpme && useSwitch)
throw OpenMMException("Switching cannot be used with LJPME");
if(ljpme && !pme)
throw OpenMMException("LJPME has been set, without PME being set");
// **************************************************************************************
// SELF ENERGY
// **************************************************************************************
if (includeReciprocal) {
for (int atomID = 0; atomID < numberOfAtoms; atomID++) {
RealOpenMM selfEwaldEnergy = (RealOpenMM) (ONE_4PI_EPS0*atomParameters[atomID][QIndex]*atomParameters[atomID][QIndex] * alphaEwald/SQRT_PI);
if(ljpme) {
// Dispersion self term
selfEwaldEnergy -= pow(alphaDispersionEwald, 6.0) * 64.0*pow(atomParameters[atomID][SigIndex], 6.0) * pow(atomParameters[atomID][EpsIndex], 2.0) / 12.0;
}
totalSelfEwaldEnergy -= selfEwaldEnergy;
if (energyByAtom) {
energyByAtom[atomID] -= selfEwaldEnergy;
......@@ -222,9 +251,9 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
*totalEnergy += totalSelfEwaldEnergy;
}
// **************************************************************************************
// RECIPROCAL SPACE EWALD ENERGY AND FORCES
// **************************************************************************************
// **************************************************************************************
// RECIPROCAL SPACE EWALD ENERGY AND FORCES
// **************************************************************************************
// PME
if (pme && includeReciprocal) {
......@@ -245,8 +274,31 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
energyByAtom[n] += recipEnergy;
pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<RealVec> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = 8.0*pow(atomParameters[i][SigIndex], 3.0) * atomParameters[i][EpsIndex];
dpmeforces.push_back(RealVec());
}
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
}
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
if (energyByAtom)
for (int n = 0; n < numberOfAtoms; n++)
energyByAtom[n] += recipDispersionEnergy;
pme_destroy(pmedata);
}
}
// Ewald method
else if (ewald && includeReciprocal) {
......@@ -258,7 +310,7 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms);
......@@ -350,15 +402,16 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
}
}
// **************************************************************************************
// SHORT-RANGE ENERGY AND FORCES
// **************************************************************************************
// **************************************************************************************
// SHORT-RANGE ENERGY AND FORCES
// **************************************************************************************
if (!includeDirect)
return;
RealOpenMM totalVdwEnergy = 0.0f;
RealOpenMM totalRealSpaceEwaldEnergy = 0.0f;
for (int i = 0; i < (int) neighborList->size(); i++) {
OpenMM::AtomPair pair = (*neighborList)[i];
int ii = pair.first;
......@@ -387,6 +440,37 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
RealOpenMM eps = atomParameters[ii][EpsIndex]*atomParameters[jj][EpsIndex];
dEdR += switchValue*eps*(twelve*sig6 - six)*sig6*inverseR*inverseR;
vdwEnergy = eps*(sig6-one)*sig6;
if (ljpme) {
RealOpenMM dalphaR = alphaDispersionEwald * r;
RealOpenMM dar2 = dalphaR*dalphaR;
RealOpenMM dar4 = dar2*dar2;
RealOpenMM dar6 = dar4*dar2;
RealOpenMM inverseR2 = inverseR*inverseR;
RealOpenMM c6i = 8.0*pow(atomParameters[ii][SigIndex], 3.0) * atomParameters[ii][EpsIndex];
RealOpenMM c6j = 8.0*pow(atomParameters[jj][SigIndex], 3.0) * atomParameters[jj][EpsIndex];
// For the energies and forces, we first add the regular Lorentz−Berthelot terms. The C12 term is treated as usual
// but we then subtract out (remembering that the C6 term is negative) the multiplicative C6 term that has been
// computed in real space. Finally, we add a potential shift term to account for the difference between the LB
// and multiplicative functional forms at the cutoff.
RealOpenMM emult = c6i*c6j*inverseR2*inverseR2*inverseR2*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4));
dEdR += 6.0*c6i*c6j*inverseR2*inverseR2*inverseR2*inverseR2*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
RealOpenMM inverseCut2 = 1.0/(cutoffDistance*cutoffDistance);
RealOpenMM inverseCut6 = inverseCut2*inverseCut2*inverseCut2;
sig2 = atomParameters[ii][SigIndex] + atomParameters[jj][SigIndex];
sig2 *= sig2;
sig6 = sig2*sig2*sig2;
// The additive part of the potential shift
RealOpenMM potentialshift = eps*(one-sig6*inverseCut6)*sig6*inverseCut6;
dalphaR = alphaDispersionEwald * cutoffDistance;
dar2 = dalphaR*dalphaR;
dar4 = dar2*dar2;
// The multiplicative part of the potential shift
potentialshift -= c6i*c6j*inverseCut6*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4));
vdwEnergy += emult + potentialshift;
}
if (useSwitch) {
dEdR -= vdwEnergy*switchDeriv*inverseR;
vdwEnergy *= switchValue;
......@@ -452,6 +536,24 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
realSpaceEwaldEnergy = (RealOpenMM) (alphaEwald*TWO_OVER_SQRT_PI*ONE_4PI_EPS0*atomParameters[ii][QIndex]*atomParameters[jj][QIndex]);
}
if(ljpme){
// Dispersion terms. Here we just back out the reciprocal space terms, and don't add any extra real space terms.
RealOpenMM dalphaR = alphaDispersionEwald * r;
RealOpenMM inverseR2 = inverseR*inverseR;
RealOpenMM dar2 = dalphaR*dalphaR;
RealOpenMM dar4 = dar2*dar2;
RealOpenMM dar6 = dar4*dar2;
RealOpenMM c6i = 8.0*pow(atomParameters[ii][SigIndex], 3.0) * atomParameters[ii][EpsIndex];
RealOpenMM c6j = 8.0*pow(atomParameters[jj][SigIndex], 3.0) * atomParameters[jj][EpsIndex];
realSpaceEwaldEnergy -= c6i*c6j*inverseR2*inverseR2*inverseR2*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4));
RealOpenMM dEdR = -6.0*c6i*c6j*inverseR2*inverseR2*inverseR2*inverseR2*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
for (int kk = 0; kk < 3; kk++) {
RealOpenMM force = dEdR*deltaR[0][kk];
forces[ii][kk] -= force;
forces[jj][kk] += force;
}
}
totalExclusionEnergy += realSpaceEwaldEnergy;
if (energyByAtom) {
energyByAtom[ii] -= realSpaceEwaldEnergy;
......@@ -488,7 +590,7 @@ void ReferenceLJCoulombIxn::calculatePairIxn(int numberOfAtoms, vector<RealVec>&
RealOpenMM* fixedParameters, vector<RealVec>& forces,
RealOpenMM* energyByAtom, RealOpenMM* totalEnergy, bool includeDirect, bool includeReciprocal) const {
if (ewald || pme) {
if (ewald || pme || ljpme) {
calculateEwaldIxn(numberOfAtoms, atomCoordinates, atomParameters, exclusions, fixedParameters, forces, energyByAtom,
totalEnergy, includeDirect, includeReciprocal);
return;
......@@ -512,7 +614,7 @@ void ReferenceLJCoulombIxn::calculatePairIxn(int numberOfAtoms, vector<RealVec>&
}
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn between two atoms
......@@ -608,5 +710,5 @@ void ReferenceLJCoulombIxn::calculateOneIxn(int ii, int jj, vector<RealVec>& ato
energyByAtom[ii] += energy;
energyByAtom[jj] += energy;
}
}
}
......@@ -513,6 +513,106 @@ pme_reciprocal_convolution(pme_t pme,
}
static void
dpme_reciprocal_convolution(pme_t pme,
const RealVec periodicBoxVectors[3],
const RealVec recipBoxVectors[3],
RealOpenMM * energy)
{
int kx,ky,kz;
int nx,ny,nz;
RealOpenMM mx,my,mz;
RealOpenMM mhx,mhy,mhz,m2;
RealOpenMM bx,by,bz;
RealOpenMM d1,d2;
RealOpenMM eterm,struct2,ets2;
RealOpenMM esum;
RealOpenMM denom;
RealOpenMM boxfactor;
RealOpenMM maxkx,maxky,maxkz;
t_complex *ptr;
nx = pme->ngrid[0];
ny = pme->ngrid[1];
nz = pme->ngrid[2];
boxfactor = (RealOpenMM) M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);
esum = 0;
maxkx = (RealOpenMM) ((nx+1)/2);
maxky = (RealOpenMM) ((ny+1)/2);
maxkz = (RealOpenMM) ((nz+1)/2);
RealOpenMM bfac = M_PI / pme->ewaldcoeff;
RealOpenMM fac1 = 2.0*M_PI*M_PI*M_PI*sqrt(M_PI);
RealOpenMM fac2 = pme->ewaldcoeff*pme->ewaldcoeff*pme->ewaldcoeff;
RealOpenMM fac3 = -2.0*pme->ewaldcoeff*M_PI*M_PI;
RealOpenMM b, m, m3, expfac, expterm, erfcterm;
for (kx=0;kx<nx;kx++)
{
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
mx = (RealOpenMM) ((kx<maxkx) ? kx : (kx-nx));
mhx = mx*recipBoxVectors[0][0];
bx = pme->bsplines_moduli[0][kx];
for (ky=0;ky<ny;ky++)
{
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
my = (RealOpenMM) ((ky<maxky) ? ky : (ky-ny));
mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
by = pme->bsplines_moduli[1][ky];
for (kz=0;kz<nz;kz++)
{
/*
* Unlike the Coulombic case, there's an m=0 term so all terms are considered here.
*/
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
mz = (RealOpenMM) ((kz<maxkz) ? kz : (kz-nz));
mhz = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];
/* Pointer to the grid cell in question */
ptr = pme->grid + kx*ny*nz + ky*nz + kz;
/* Get grid data for this frequency */
d1 = ptr->re;
d2 = ptr->im;
/* Calculate the convolution - see the Essman/Darden paper for the equation! */
m2 = mhx*mhx+mhy*mhy+mhz*mhz;
bz = pme->bsplines_moduli[2][kz];
denom = boxfactor / (bx*by*bz);
m = sqrt(m2);
m3 = m*m2;
b = bfac*m;
expfac = -b*b;
erfcterm = erfc(b);
expterm = exp(expfac);
eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
/* write back convolution data to grid */
ptr->re = d1*eterm;
ptr->im = d2*eterm;
struct2 = (d1*d1+d2*d2);
/* Long-range PME contribution to the energy for this frequency */
ets2 = eterm*struct2;
esum += ets2;
}
}
}
// Remember the C6 energy is attractive, hence the negative sign.
*energy = (RealOpenMM) (-esum);
}
static void
pme_grid_interpolate_force(pme_t pme,
const RealVec recipBoxVectors[3],
......@@ -704,6 +804,49 @@ int pme_exec(pme_t pme,
}
int pme_exec_dpme(pme_t pme,
const vector<RealVec>& atomCoordinates,
vector<RealVec>& forces,
const vector<RealOpenMM>& c6s,
const RealVec periodicBoxVectors[3],
RealOpenMM* energy)
{
/* Routine is called with coordinates in x, a box, and charges in q */
RealVec recipBoxVectors[3];
invert_box_vectors(periodicBoxVectors, recipBoxVectors);
/* Before we can do the actual interpolation, we need to recalculate and update
* the indices for each particle in the charge grid (initialized in pme_init()),
* and what its fractional offset in this grid cell is.
*/
/* Update charge grid indices and fractional offsets for each atom.
* The indices/fractions are stored internally in the pme datatype
*/
pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);
/* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
pme_update_bsplines(pme);
/* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
pme_grid_spread_charge(pme, c6s);
/* do 3d-fft */
fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);
/* solve in k-space */
dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);
/* do 3d-invfft */
fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);
/* Get the particle forces from the grid and bsplines in the pme structure */
pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces);
return 0;
}
int
pme_destroy(pme_t pme)
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "ReferenceTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
......@@ -1155,7 +1155,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
nb.setCutoffDistance(force.getCutoffDistance());
NonbondedForceImpl::calcPMEParameters(system, nb, alpha, gridSizeX, gridSizeY, gridSizeZ);
NonbondedForceImpl::calcPMEParameters(system, nb, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment