"platforms/cuda2/tests/TestCudaEwald.cpp" did not exist on "6b0b0a26e9c41604cda816a7e94fe7cef8ef95a6"
Commit 53f770f4 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1557 from andysim/dpme

Dispersion PME
parents f36f36bf c0850062
...@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& ...@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz); dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz);
} }
void CudaParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getLJPMEParameters(alpha, nx, ny, nz);
}
class CudaParallelCalcCustomNonbondedForceKernel::Task : public CudaContext::WorkTask { class CudaParallelCalcCustomNonbondedForceKernel::Task : public CudaContext::WorkTask {
public: public:
Task(ContextImpl& context, CudaCalcCustomNonbondedForceKernel& kernel, bool includeForce, Task(ContextImpl& context, CudaCalcCustomNonbondedForceKernel& kernel, bool includeForce,
......
...@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys ...@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name"); CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
deviceName << name; deviceName << name;
} }
size_t printfsize;
cuCtxGetLimit(&printfsize, CU_LIMIT_PRINTF_FIFO_SIZE);
cuCtxSetLimit(CU_LIMIT_PRINTF_FIFO_SIZE, 10*printfsize);
useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision()); useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
disablePmeStream = (pmeStreamProperty == "true"); disablePmeStream = (pmeStreamProperty == "true");
deterministicForces = (deterministicForcesProperty == "true"); deterministicForces = (deterministicForcesProperty == "true");
......
...@@ -17,6 +17,26 @@ ...@@ -17,6 +17,26 @@
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr; const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif #endif
real tempForce = 0.0f; real tempForce = 0.0f;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) { if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
...@@ -29,6 +49,13 @@ ...@@ -29,6 +49,13 @@
includeInteraction = false; includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w; tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
} }
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
} }
else { else {
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
...@@ -36,7 +63,8 @@ ...@@ -36,7 +63,8 @@
real sig2 = invR*sig; real sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
real sig6 = sig2*sig2*sig2; real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y); real eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f); tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f); real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH #if USE_LJ_SWITCH
...@@ -48,6 +76,22 @@ ...@@ -48,6 +76,22 @@
ljEnergy *= switchValue; ljEnergy *= switchValue;
} }
#endif #endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0; tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0;
#else #else
......
...@@ -21,7 +21,11 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int ...@@ -21,7 +21,11 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid, extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) { real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon
#endif
) {
real3 data[PME_ORDER]; real3 data[PME_ORDER];
const real scale = RECIP(PME_ORDER-1); const real scale = RECIP(PME_ORDER-1);
...@@ -63,6 +67,12 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real ...@@ -63,6 +67,12 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
// Spread the charge from this atom onto each grid point. // Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
for (int ix = 0; ix < PME_ORDER; ix++) { for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndex.x+ix; int xbase = gridIndex.x+ix;
xbase -= (xbase >= GRID_SIZE_X ? GRID_SIZE_X : 0); xbase -= (xbase >= GRID_SIZE_X ? GRID_SIZE_X : 0);
...@@ -80,7 +90,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real ...@@ -80,7 +90,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0); zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = ybase + zindex; int index = ybase + zindex;
real add = pos.w*dx*dy*data[iz].z; real add = charge*dx*dy*data[iz].z;
#ifdef USE_DOUBLE_PRECISION #ifdef USE_DOUBLE_PRECISION
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid; unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
atomicAdd(&ulonglong_p[index], static_cast<unsigned long long>((long long) (add*0x100000000))); atomicAdd(&ulonglong_p[index], static_cast<unsigned long long>((long long) (add*0x100000000)));
...@@ -121,7 +131,15 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict ...@@ -121,7 +131,15 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half // R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1); const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -2*M_PI*SQRT(M_PI)*RECIP(6*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z); const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
#endif
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
// real indices // real indices
...@@ -140,12 +158,23 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict ...@@ -140,12 +158,23 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real bz = pmeBsplineModuliZ[kz]; real bz = pmeBsplineModuliZ[kz];
real2 grid = halfcomplex_pmeGrid[index]; real2 grid = halfcomplex_pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz; real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = ERFC(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
halfcomplex_pmeGrid[index] = make_real2(grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz; real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom; real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) { if (kx != 0 || ky != 0 || kz != 0) {
halfcomplex_pmeGrid[index] = make_real2(grid.x*eterm, grid.y*eterm); halfcomplex_pmeGrid[index] = make_real2(grid.x*eterm, grid.y*eterm);
} }
#endif
} }
} }
...@@ -156,7 +185,15 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ ...@@ -156,7 +185,15 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) { real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half // R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z; const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -2*M_PI*SQRT(M_PI)*RECIP(6*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z); const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
#endif
mixed energy = 0; mixed energy = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) { for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
...@@ -175,8 +212,19 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ ...@@ -175,8 +212,19 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real bx = pmeBsplineModuliX[kx]; real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky]; real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz]; real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = ERFC(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz; real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom; real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) { if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx); kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
...@@ -185,11 +233,12 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ ...@@ -185,11 +233,12 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
} }
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1)); int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = halfcomplex_pmeGrid[indexInHalfComplexGrid]; real2 grid = halfcomplex_pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 || kz != 0) { #ifndef USE_LJPME
if (kx != 0 || ky != 0 || kz != 0)
#endif
energy += eterm*(grid.x*grid.x + grid.y*grid.y); energy += eterm*(grid.x*grid.x + grid.y*grid.y);
} }
} #if defined(USE_PME_STREAM) && !defined(USE_LJPME)
#ifdef USE_PME_STREAM
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = 0.5f*energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = 0.5f*energy;
#else #else
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energy;
...@@ -199,7 +248,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ ...@@ -199,7 +248,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern "C" __global__ extern "C" __global__
void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid, void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) { real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon
#endif
) {
real3 data[PME_ORDER]; real3 data[PME_ORDER];
real3 ddata[PME_ORDER]; real3 ddata[PME_ORDER];
const real scale = RECIP(PME_ORDER-1); const real scale = RECIP(PME_ORDER-1);
...@@ -271,7 +324,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __ ...@@ -271,7 +324,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
} }
} }
} }
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
real q = pos.w*EPSILON_FACTOR; real q = pos.w*EPSILON_FACTOR;
#endif
real forceX = -q*(force.x*GRID_SIZE_X*recipBoxVecX.x); real forceX = -q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y); real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
real forceZ = -q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z); real forceZ = -q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
...@@ -591,8 +591,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel { ...@@ -591,8 +591,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcNonbondedForceKernel(name, platform), OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcNonbondedForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL), hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL),
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL),
pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) { pmeDispersionBsplineModuliY(NULL), pmeDispersionBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeAtomRange(NULL),
pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), dispersionFft(NULL), pmeio(NULL) {
} }
~OpenCLCalcNonbondedForceKernel(); ~OpenCLCalcNonbondedForceKernel();
/** /**
...@@ -629,6 +630,15 @@ public: ...@@ -629,6 +630,15 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class SortTrait : public OpenCLSort::SortTrait { class SortTrait : public OpenCLSort::SortTrait {
int getDataSize() const {return 8;} int getDataSize() const {return 8;}
...@@ -657,6 +667,9 @@ private: ...@@ -657,6 +667,9 @@ private:
OpenCLArray* pmeBsplineModuliX; OpenCLArray* pmeBsplineModuliX;
OpenCLArray* pmeBsplineModuliY; OpenCLArray* pmeBsplineModuliY;
OpenCLArray* pmeBsplineModuliZ; OpenCLArray* pmeBsplineModuliZ;
OpenCLArray* pmeDispersionBsplineModuliX;
OpenCLArray* pmeDispersionBsplineModuliY;
OpenCLArray* pmeDispersionBsplineModuliZ;
OpenCLArray* pmeBsplineTheta; OpenCLArray* pmeBsplineTheta;
OpenCLArray* pmeAtomRange; OpenCLArray* pmeAtomRange;
OpenCLArray* pmeAtomGridIndex; OpenCLArray* pmeAtomGridIndex;
...@@ -665,25 +678,34 @@ private: ...@@ -665,25 +678,34 @@ private:
cl::CommandQueue pmeQueue; cl::CommandQueue pmeQueue;
cl::Event pmeSyncEvent; cl::Event pmeSyncEvent;
OpenCLFFT3D* fft; OpenCLFFT3D* fft;
OpenCLFFT3D* dispersionFft;
Kernel cpuPme; Kernel cpuPme;
PmeIO* pmeio; PmeIO* pmeio;
SyncQueuePostComputation* syncQueue; SyncQueuePostComputation* syncQueue;
cl::Kernel ewaldSumsKernel; cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel; cl::Kernel ewaldForcesKernel;
cl::Kernel pmeGridIndexKernel;
cl::Kernel pmeAtomRangeKernel; cl::Kernel pmeAtomRangeKernel;
cl::Kernel pmeDispersionAtomRangeKernel;
cl::Kernel pmeZIndexKernel; cl::Kernel pmeZIndexKernel;
cl::Kernel pmeDispersionZIndexKernel;
cl::Kernel pmeUpdateBsplinesKernel; cl::Kernel pmeUpdateBsplinesKernel;
cl::Kernel pmeDispersionUpdateBsplinesKernel;
cl::Kernel pmeSpreadChargeKernel; cl::Kernel pmeSpreadChargeKernel;
cl::Kernel pmeDispersionSpreadChargeKernel;
cl::Kernel pmeFinishSpreadChargeKernel; cl::Kernel pmeFinishSpreadChargeKernel;
cl::Kernel pmeDispersionFinishSpreadChargeKernel;
cl::Kernel pmeConvolutionKernel; cl::Kernel pmeConvolutionKernel;
cl::Kernel pmeDispersionConvolutionKernel;
cl::Kernel pmeEvalEnergyKernel; cl::Kernel pmeEvalEnergyKernel;
cl::Kernel pmeDispersionEvalEnergyKernel;
cl::Kernel pmeInterpolateForceKernel; cl::Kernel pmeInterpolateForceKernel;
cl::Kernel pmeDispersionInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines; std::map<std::string, std::string> pmeDefines;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha; double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeQueue; int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeQueue, doLJPME;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
......
...@@ -438,6 +438,15 @@ public: ...@@ -438,6 +438,15 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class Task; class Task;
OpenCLPlatform::PlatformData& data; OpenCLPlatform::PlatformData& data;
......
This diff is collapsed.
...@@ -583,6 +583,10 @@ void OpenCLParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int ...@@ -583,6 +583,10 @@ void OpenCLParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int
dynamic_cast<const OpenCLCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz); dynamic_cast<const OpenCLCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz);
} }
void OpenCLParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const OpenCLCalcNonbondedForceKernel&>(kernels[0].getImpl()).getLJPMEParameters(alpha, nx, ny, nz);
}
class OpenCLParallelCalcCustomNonbondedForceKernel::Task : public OpenCLContext::WorkTask { class OpenCLParallelCalcCustomNonbondedForceKernel::Task : public OpenCLContext::WorkTask {
public: public:
Task(ContextImpl& context, OpenCLCalcCustomNonbondedForceKernel& kernel, bool includeForce, Task(ContextImpl& context, OpenCLCalcCustomNonbondedForceKernel& kernel, bool includeForce,
......
...@@ -22,6 +22,26 @@ ...@@ -22,6 +22,26 @@
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr; const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif #endif
real tempForce = 0; real tempForce = 0;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) { if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
...@@ -34,6 +54,13 @@ ...@@ -34,6 +54,13 @@
includeInteraction = false; includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w; tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
} }
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
} }
else { else {
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
...@@ -41,7 +68,8 @@ ...@@ -41,7 +68,8 @@
real sig2 = invR*sig; real sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
real sig6 = sig2*sig2*sig2; real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y); real eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f); tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f); real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH #if USE_LJ_SWITCH
...@@ -53,6 +81,22 @@ ...@@ -53,6 +81,22 @@
ljEnergy *= switchValue; ljEnergy *= switchValue;
} }
#endif #endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction); tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
#else #else
......
__kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache, __kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache,
__global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, __global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
const real4 scale = 1/(real) (PME_ORDER-1); const real4 scale = 1/(real) (PME_ORDER-1);
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
__local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER]; __local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
...@@ -33,7 +37,13 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4 ...@@ -33,7 +37,13 @@ __kernel void updateBsplines(__global const real4* restrict posq, __global real4
data[PME_ORDER-j-1] = scale*((dr+(real4) j)*data[PME_ORDER-j-2] + (-dr+(real4) (PME_ORDER-j))*data[PME_ORDER-j-1]); data[PME_ORDER-j-1] = scale*((dr+(real4) j)*data[PME_ORDER-j-2] + (-dr+(real4) (PME_ORDER-j))*data[PME_ORDER-j-1]);
data[0] = scale*(-dr+1.0f)*data[0]; data[0] = scale*(-dr+1.0f)*data[0];
for (int j = 0; j < PME_ORDER; j++) { for (int j = 0; j < PME_ORDER; j++) {
data[j].w = pos.w; // Storing the charge here improves cache coherency in the charge spreading kernel #ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
data[j].w = charge; // Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta[i+j*NUM_ATOMS] = data[j]; pmeBsplineTheta[i+j*NUM_ATOMS] = data[j];
} }
#endif #endif
...@@ -86,7 +96,11 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co ...@@ -86,7 +96,11 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
const real scale = 1/(real) (PME_ORDER-1); const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
...@@ -128,6 +142,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -128,6 +142,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
// Spread the charge from this atom onto each grid point. // Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
for (int ix = 0; ix < PME_ORDER; ix++) { for (int ix = 0; ix < PME_ORDER; ix++) {
int xindex = gridIndex.x+ix; int xindex = gridIndex.x+ix;
xindex -= (xindex >= GRID_SIZE_X ? GRID_SIZE_X : 0); xindex -= (xindex >= GRID_SIZE_X ? GRID_SIZE_X : 0);
...@@ -138,7 +158,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -138,7 +158,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz; int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0); zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex; int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
real add = pos.w*data[ix].x*data[iy].y*data[iz].z; real add = charge*data[ix].x*data[iy].y*data[iz].z;
#ifdef USE_ALTERNATE_MEMORY_ACCESS_PATTERN #ifdef USE_ALTERNATE_MEMORY_ACCESS_PATTERN
// On Nvidia devices (at least Maxwell anyway), this split ordering produces much higher performance. Why? // On Nvidia devices (at least Maxwell anyway), this split ordering produces much higher performance. Why?
// I have no idea! And of course on AMD it produces slower performance. GPUs are not meant to be understood. // I have no idea! And of course on AMD it produces slower performance. GPUs are not meant to be understood.
...@@ -167,7 +187,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea ...@@ -167,7 +187,11 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
#elif defined(DEVICE_IS_CPU) #elif defined(DEVICE_IS_CPU)
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0); const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0);
const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0); const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0);
if (firstx == lastx) if (firstx == lastx)
...@@ -194,6 +218,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -194,6 +218,12 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
// Spread the charge from this atom onto each grid point. // Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
bool hasComputedThetas = false; bool hasComputedThetas = false;
for (int ix = 0; ix < PME_ORDER; ix++) { for (int ix = 0; ix < PME_ORDER; ix++) {
int xindex = gridIndex.x+ix; int xindex = gridIndex.x+ix;
...@@ -229,7 +259,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -229,7 +259,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz; int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0); zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex; int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
pmeGrid[index] += EPSILON_FACTOR*pos.w*data[ix].x*data[iy].y*data[iz].z; pmeGrid[index] += EPSILON_FACTOR*charge*data[ix].x*data[iy].y*data[iz].z;
} }
} }
} }
...@@ -237,7 +267,11 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -237,7 +267,11 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
} }
#else #else
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta) { __global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z; unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) { for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
// Compute the charge on a grid point. // Compute the charge on a grid point.
...@@ -298,7 +332,15 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c ...@@ -298,7 +332,15 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half // R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1); const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z; const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) { for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices // real indices
...@@ -317,11 +359,23 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c ...@@ -317,11 +359,23 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
real bz = pmeBsplineModuliZ[kz]; real bz = pmeBsplineModuliZ[kz];
real2 grid = pmeGrid[index]; real2 grid = pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz; real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz; real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom; real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) { if (kx != 0 || ky != 0 || kz != 0) {
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm); pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
} }
#endif
} }
} }
...@@ -330,7 +384,15 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe ...@@ -330,7 +384,15 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half // R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z; const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -(2*M_PI/6)*SQRT(M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z; const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
#endif
mixed energy = 0; mixed energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) { for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
...@@ -349,8 +411,19 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe ...@@ -349,8 +411,19 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
real bx = pmeBsplineModuliX[kx]; real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky]; real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz]; real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = erfc(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz; real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom; real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) { if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx); kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky); ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky);
...@@ -358,11 +431,12 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe ...@@ -358,11 +431,12 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
} }
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1)); int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = pmeGrid[indexInHalfComplexGrid]; real2 grid = pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 || kz != 0) { #ifndef USE_LJPME
if (kx != 0 || ky != 0 || kz != 0)
#endif
energy += eterm*(grid.x*grid.x + grid.y*grid.y); energy += eterm*(grid.x*grid.x + grid.y*grid.y);
} }
} #if defined(USE_PME_STREAM) && !defined(USE_LJPME)
#ifdef USE_PME_STREAM
energyBuffer[get_global_id(0)] = 0.5f*energy; energyBuffer[get_global_id(0)] = 0.5f*energy;
#else #else
energyBuffer[get_global_id(0)] += 0.5f*energy; energyBuffer[get_global_id(0)] += 0.5f*energy;
...@@ -371,7 +445,11 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe ...@@ -371,7 +445,11 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
__kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid, __kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX,
real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) { real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex
#ifdef USE_LJPME
, __global const float2* restrict sigmaEpsilon
#endif
) {
const real scale = 1/(real) (PME_ORDER-1); const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
real4 ddata[PME_ORDER]; real4 ddata[PME_ORDER];
...@@ -436,7 +514,12 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global ...@@ -436,7 +514,12 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
} }
} }
real4 totalForce = forceBuffers[atom]; real4 totalForce = forceBuffers[atom];
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
real q = pos.w*EPSILON_FACTOR; real q = pos.w*EPSILON_FACTOR;
#endif
totalForce.x -= q*(force.x*GRID_SIZE_X*recipBoxVecX.x); totalForce.x -= q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
totalForce.y -= q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y); totalForce.y -= q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
totalForce.z -= q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z); totalForce.z -= q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
...@@ -604,12 +604,21 @@ public: ...@@ -604,12 +604,21 @@ public:
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
int numParticles, num14; int numParticles, num14;
int **bonded14IndexArray; int **bonded14IndexArray;
RealOpenMM **particleParamArray, **bonded14ParamArray; RealOpenMM **particleParamArray, **bonded14ParamArray;
RealOpenMM nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, dispersionCoefficient; RealOpenMM nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, dispersionCoefficient;
int kmax[3], gridSize[3]; int kmax[3], gridSize[3], dispersionGridSize[3];
bool useSwitchingFunction; bool useSwitchingFunction;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
......
...@@ -38,14 +38,14 @@ class ReferenceLJCoulombIxn { ...@@ -38,14 +38,14 @@ class ReferenceLJCoulombIxn {
bool useSwitch; bool useSwitch;
bool periodic; bool periodic;
bool ewald; bool ewald;
bool pme; bool pme, ljpme;
const OpenMM::NeighborList* neighborList; const OpenMM::NeighborList* neighborList;
OpenMM::RealVec periodicBoxVectors[3]; OpenMM::RealVec periodicBoxVectors[3];
RealOpenMM cutoffDistance, switchingDistance; RealOpenMM cutoffDistance, switchingDistance;
RealOpenMM krf, crf; RealOpenMM krf, crf;
RealOpenMM alphaEwald; RealOpenMM alphaEwald, alphaDispersionEwald;
int numRx, numRy, numRz; int numRx, numRy, numRz;
int meshDim[3]; int meshDim[3], dispersionMeshDim[3];
// parameter indices // parameter indices
...@@ -149,6 +149,18 @@ class ReferenceLJCoulombIxn { ...@@ -149,6 +149,18 @@ class ReferenceLJCoulombIxn {
void setUsePME(RealOpenMM alpha, int meshSize[3]); void setUsePME(RealOpenMM alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param dalpha the dispersion Ewald separation parameter
@param dgridSize the dimensions of the dispersion mesh
--------------------------------------------------------------------------------------- */
void setUseLJPME(RealOpenMM dalpha, int dmeshSize[3]);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn Calculate LJ Coulomb pair ixn
......
...@@ -87,6 +87,28 @@ pme_exec(pme_t pme, ...@@ -87,6 +87,28 @@ pme_exec(pme_t pme,
RealOpenMM * energy); RealOpenMM * energy);
/*
* Evaluate reciprocal space PME dispersion energy and forces.
*
* Args:
*
* pme Opaque pme_t object, must have been initialized with pme_init()
* x Pointer to coordinate data array (nm)
* f Pointer to force data array (will be written as kJ/mol/nm)
* c6s Array of c6 coefficients (units of sqrt(kJ/mol).nm^3 )
* box Simulation cell dimensions (nm)
* energy Total energy (will be written in units of kJ/mol)
*/
int OPENMM_EXPORT
pme_exec_dpme(pme_t pme,
const std::vector<OpenMM::RealVec>& atomCoordinates,
std::vector<OpenMM::RealVec>& forces,
const std::vector<RealOpenMM>& c6s,
const OpenMM::RealVec periodicBoxVectors[3],
RealOpenMM * energy);
/* Release all memory in pme structure */ /* Release all memory in pme structure */
int OPENMM_EXPORT int OPENMM_EXPORT
......
...@@ -969,9 +969,17 @@ void ReferenceCalcNonbondedForceKernel::initialize(const System& system, const N ...@@ -969,9 +969,17 @@ void ReferenceCalcNonbondedForceKernel::initialize(const System& system, const N
} }
else if (nonbondedMethod == PME) { else if (nonbondedMethod == PME) {
double alpha; double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]); NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = (RealOpenMM) alpha; ewaldAlpha = (RealOpenMM) alpha;
} }
else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = (RealOpenMM) alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = (RealOpenMM) alpha;
useSwitchingFunction = false;
}
rfDielectric = (RealOpenMM)force.getReactionFieldDielectric(); rfDielectric = (RealOpenMM)force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection()) if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force); dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
...@@ -987,11 +995,12 @@ double ReferenceCalcNonbondedForceKernel::execute(ContextImpl& context, bool inc ...@@ -987,11 +995,12 @@ double ReferenceCalcNonbondedForceKernel::execute(ContextImpl& context, bool inc
bool periodic = (nonbondedMethod == CutoffPeriodic); bool periodic = (nonbondedMethod == CutoffPeriodic);
bool ewald = (nonbondedMethod == Ewald); bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, extractBoxVectors(context), periodic || ewald || pme, nonbondedCutoff, 0.0); computeNeighborListVoxelHash(*neighborList, numParticles, posData, exclusions, extractBoxVectors(context), periodic || ewald || pme || ljpme, nonbondedCutoff, 0.0);
clj.setUseCutoff(nonbondedCutoff, *neighborList, rfDielectric); clj.setUseCutoff(nonbondedCutoff, *neighborList, rfDielectric);
} }
if (periodic || ewald || pme) { if (periodic || ewald || pme || ljpme) {
RealVec* boxVectors = extractBoxVectors(context); RealVec* boxVectors = extractBoxVectors(context);
double minAllowedSize = 1.999999*nonbondedCutoff; double minAllowedSize = 1.999999*nonbondedCutoff;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize) if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
...@@ -1002,6 +1011,10 @@ double ReferenceCalcNonbondedForceKernel::execute(ContextImpl& context, bool inc ...@@ -1002,6 +1011,10 @@ double ReferenceCalcNonbondedForceKernel::execute(ContextImpl& context, bool inc
clj.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]); clj.setUseEwald(ewaldAlpha, kmax[0], kmax[1], kmax[2]);
if (pme) if (pme)
clj.setUsePME(ewaldAlpha, gridSize); clj.setUsePME(ewaldAlpha, gridSize);
if (ljpme){
clj.setUsePME(ewaldAlpha, gridSize);
clj.setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
if (useSwitchingFunction) if (useSwitchingFunction)
clj.setUseSwitchingFunction(switchingDistance); clj.setUseSwitchingFunction(switchingDistance);
clj.calculatePairIxn(numParticles, posData, particleParamArray, exclusions, 0, forceData, 0, includeEnergy ? &energy : NULL, includeDirect, includeReciprocal); clj.calculatePairIxn(numParticles, posData, particleParamArray, exclusions, 0, forceData, 0, includeEnergy ? &energy : NULL, includeDirect, includeReciprocal);
...@@ -1059,14 +1072,23 @@ void ReferenceCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& con ...@@ -1059,14 +1072,23 @@ void ReferenceCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& con
} }
void ReferenceCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const { void ReferenceCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME) if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME"); throw OpenMMException("getPMEParametersInContext: This Context is not using PME or LJPME");
alpha = ewaldAlpha; alpha = ewaldAlpha;
nx = gridSize[0]; nx = gridSize[0];
ny = gridSize[1]; ny = gridSize[1];
nz = gridSize[2]; nz = gridSize[2];
} }
void ReferenceCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using LJPME");
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
ReferenceCalcCustomNonbondedForceKernel::~ReferenceCalcCustomNonbondedForceKernel() { ReferenceCalcCustomNonbondedForceKernel::~ReferenceCalcCustomNonbondedForceKernel() {
disposeRealArray(particleParamArray, numParticles); disposeRealArray(particleParamArray, numParticles);
if (neighborList != NULL) if (neighborList != NULL)
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <sstream> #include <sstream>
#include <complex> #include <complex>
#include <algorithm> #include <algorithm>
#include <iostream>
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include "ReferenceLJCoulombIxn.h" #include "ReferenceLJCoulombIxn.h"
...@@ -47,7 +48,7 @@ using namespace OpenMM; ...@@ -47,7 +48,7 @@ using namespace OpenMM;
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
ReferenceLJCoulombIxn::ReferenceLJCoulombIxn() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false) { ReferenceLJCoulombIxn::ReferenceLJCoulombIxn() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false) {
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
...@@ -73,7 +74,7 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn() { ...@@ -73,7 +74,7 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn() {
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use a cutoff. Set the force to use a cutoff.
...@@ -83,14 +84,14 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn() { ...@@ -83,14 +84,14 @@ ReferenceLJCoulombIxn::~ReferenceLJCoulombIxn() {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUseCutoff(RealOpenMM distance, const OpenMM::NeighborList& neighbors, RealOpenMM solventDielectric) { void ReferenceLJCoulombIxn::setUseCutoff(RealOpenMM distance, const OpenMM::NeighborList& neighbors, RealOpenMM solventDielectric) {
cutoff = true; cutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
neighborList = &neighbors; neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0); krf = pow(cutoffDistance, -3.0)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0); crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -105,7 +106,7 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) { ...@@ -105,7 +106,7 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
switchingDistance = distance; switchingDistance = distance;
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff also been set, and the smallest side of the periodic box is at least twice the cutoff
...@@ -115,7 +116,7 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) { ...@@ -115,7 +116,7 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setPeriodic(OpenMM::RealVec* vectors) { void ReferenceLJCoulombIxn::setPeriodic(OpenMM::RealVec* vectors) {
assert(cutoff); assert(cutoff);
assert(vectors[0][0] >= 2.0*cutoffDistance); assert(vectors[0][0] >= 2.0*cutoffDistance);
...@@ -125,9 +126,9 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) { ...@@ -125,9 +126,9 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
periodicBoxVectors[0] = vectors[0]; periodicBoxVectors[0] = vectors[0];
periodicBoxVectors[1] = vectors[1]; periodicBoxVectors[1] = vectors[1];
periodicBoxVectors[2] = vectors[2]; periodicBoxVectors[2] = vectors[2];
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Ewald summation. Set the force to use Ewald summation.
...@@ -138,15 +139,15 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) { ...@@ -138,15 +139,15 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUseEwald(RealOpenMM alpha, int kmaxx, int kmaxy, int kmaxz) { void ReferenceLJCoulombIxn::setUseEwald(RealOpenMM alpha, int kmaxx, int kmaxy, int kmaxz) {
alphaEwald = alpha; alphaEwald = alpha;
numRx = kmaxx; numRx = kmaxx;
numRy = kmaxy; numRy = kmaxy;
numRz = kmaxz; numRz = kmaxz;
ewald = true; ewald = true;
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation. Set the force to use Particle-Mesh Ewald (PME) summation.
...@@ -155,13 +156,30 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) { ...@@ -155,13 +156,30 @@ void ReferenceLJCoulombIxn::setUseSwitchingFunction(RealOpenMM distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUsePME(RealOpenMM alpha, int meshSize[3]) { void ReferenceLJCoulombIxn::setUsePME(RealOpenMM alpha, int meshSize[3]) {
alphaEwald = alpha; alphaEwald = alpha;
meshDim[0] = meshSize[0]; meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1]; meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2]; meshDim[2] = meshSize[2];
pme = true; pme = true;
} }
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion terms.
@param alpha the dispersion Ewald separation parameter
@param gridSize the dimensions of the dispersion mesh
--------------------------------------------------------------------------------------- */
void ReferenceLJCoulombIxn::setUseLJPME(RealOpenMM alpha, int meshSize[3]) {
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
}
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -201,16 +219,27 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec> ...@@ -201,16 +219,27 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
RealOpenMM totalSelfEwaldEnergy = 0.0; RealOpenMM totalSelfEwaldEnergy = 0.0;
RealOpenMM realSpaceEwaldEnergy = 0.0; RealOpenMM realSpaceEwaldEnergy = 0.0;
RealOpenMM recipEnergy = 0.0; RealOpenMM recipEnergy = 0.0;
RealOpenMM recipDispersionEnergy = 0.0;
RealOpenMM totalRecipEnergy = 0.0; RealOpenMM totalRecipEnergy = 0.0;
RealOpenMM vdwEnergy = 0.0; RealOpenMM vdwEnergy = 0.0;
// ************************************************************************************** // A couple of sanity checks for
// SELF ENERGY if(ljpme && useSwitch)
// ************************************************************************************** throw OpenMMException("Switching cannot be used with LJPME");
if(ljpme && !pme)
throw OpenMMException("LJPME has been set, without PME being set");
// **************************************************************************************
// SELF ENERGY
// **************************************************************************************
if (includeReciprocal) { if (includeReciprocal) {
for (int atomID = 0; atomID < numberOfAtoms; atomID++) { for (int atomID = 0; atomID < numberOfAtoms; atomID++) {
RealOpenMM selfEwaldEnergy = (RealOpenMM) (ONE_4PI_EPS0*atomParameters[atomID][QIndex]*atomParameters[atomID][QIndex] * alphaEwald/SQRT_PI); RealOpenMM selfEwaldEnergy = (RealOpenMM) (ONE_4PI_EPS0*atomParameters[atomID][QIndex]*atomParameters[atomID][QIndex] * alphaEwald/SQRT_PI);
if(ljpme) {
// Dispersion self term
selfEwaldEnergy -= pow(alphaDispersionEwald, 6.0) * 64.0*pow(atomParameters[atomID][SigIndex], 6.0) * pow(atomParameters[atomID][EpsIndex], 2.0) / 12.0;
}
totalSelfEwaldEnergy -= selfEwaldEnergy; totalSelfEwaldEnergy -= selfEwaldEnergy;
if (energyByAtom) { if (energyByAtom) {
energyByAtom[atomID] -= selfEwaldEnergy; energyByAtom[atomID] -= selfEwaldEnergy;
...@@ -222,9 +251,9 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec> ...@@ -222,9 +251,9 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
*totalEnergy += totalSelfEwaldEnergy; *totalEnergy += totalSelfEwaldEnergy;
} }
// ************************************************************************************** // **************************************************************************************
// RECIPROCAL SPACE EWALD ENERGY AND FORCES // RECIPROCAL SPACE EWALD ENERGY AND FORCES
// ************************************************************************************** // **************************************************************************************
// PME // PME
if (pme && includeReciprocal) { if (pme && includeReciprocal) {
...@@ -245,8 +274,31 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec> ...@@ -245,8 +274,31 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
energyByAtom[n] += recipEnergy; energyByAtom[n] += recipEnergy;
pme_destroy(pmedata); pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<RealVec> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = 8.0*pow(atomParameters[i][SigIndex], 3.0) * atomParameters[i][EpsIndex];
dpmeforces.push_back(RealVec());
}
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
} }
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
if (energyByAtom)
for (int n = 0; n < numberOfAtoms; n++)
energyByAtom[n] += recipDispersionEnergy;
pme_destroy(pmedata);
}
}
// Ewald method // Ewald method
else if (ewald && includeReciprocal) { else if (ewald && includeReciprocal) {
...@@ -258,7 +310,7 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec> ...@@ -258,7 +310,7 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
// setup K-vectors // setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z] #define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3); vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms); vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms); vector<d_complex> tab_qxyz(numberOfAtoms);
...@@ -350,15 +402,16 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec> ...@@ -350,15 +402,16 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
} }
} }
// ************************************************************************************** // **************************************************************************************
// SHORT-RANGE ENERGY AND FORCES // SHORT-RANGE ENERGY AND FORCES
// ************************************************************************************** // **************************************************************************************
if (!includeDirect) if (!includeDirect)
return; return;
RealOpenMM totalVdwEnergy = 0.0f; RealOpenMM totalVdwEnergy = 0.0f;
RealOpenMM totalRealSpaceEwaldEnergy = 0.0f; RealOpenMM totalRealSpaceEwaldEnergy = 0.0f;
for (int i = 0; i < (int) neighborList->size(); i++) { for (int i = 0; i < (int) neighborList->size(); i++) {
OpenMM::AtomPair pair = (*neighborList)[i]; OpenMM::AtomPair pair = (*neighborList)[i];
int ii = pair.first; int ii = pair.first;
...@@ -387,6 +440,37 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec> ...@@ -387,6 +440,37 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
RealOpenMM eps = atomParameters[ii][EpsIndex]*atomParameters[jj][EpsIndex]; RealOpenMM eps = atomParameters[ii][EpsIndex]*atomParameters[jj][EpsIndex];
dEdR += switchValue*eps*(twelve*sig6 - six)*sig6*inverseR*inverseR; dEdR += switchValue*eps*(twelve*sig6 - six)*sig6*inverseR*inverseR;
vdwEnergy = eps*(sig6-one)*sig6; vdwEnergy = eps*(sig6-one)*sig6;
if (ljpme) {
RealOpenMM dalphaR = alphaDispersionEwald * r;
RealOpenMM dar2 = dalphaR*dalphaR;
RealOpenMM dar4 = dar2*dar2;
RealOpenMM dar6 = dar4*dar2;
RealOpenMM inverseR2 = inverseR*inverseR;
RealOpenMM c6i = 8.0*pow(atomParameters[ii][SigIndex], 3.0) * atomParameters[ii][EpsIndex];
RealOpenMM c6j = 8.0*pow(atomParameters[jj][SigIndex], 3.0) * atomParameters[jj][EpsIndex];
// For the energies and forces, we first add the regular Lorentz−Berthelot terms. The C12 term is treated as usual
// but we then subtract out (remembering that the C6 term is negative) the multiplicative C6 term that has been
// computed in real space. Finally, we add a potential shift term to account for the difference between the LB
// and multiplicative functional forms at the cutoff.
RealOpenMM emult = c6i*c6j*inverseR2*inverseR2*inverseR2*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4));
dEdR += 6.0*c6i*c6j*inverseR2*inverseR2*inverseR2*inverseR2*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
RealOpenMM inverseCut2 = 1.0/(cutoffDistance*cutoffDistance);
RealOpenMM inverseCut6 = inverseCut2*inverseCut2*inverseCut2;
sig2 = atomParameters[ii][SigIndex] + atomParameters[jj][SigIndex];
sig2 *= sig2;
sig6 = sig2*sig2*sig2;
// The additive part of the potential shift
RealOpenMM potentialshift = eps*(one-sig6*inverseCut6)*sig6*inverseCut6;
dalphaR = alphaDispersionEwald * cutoffDistance;
dar2 = dalphaR*dalphaR;
dar4 = dar2*dar2;
// The multiplicative part of the potential shift
potentialshift -= c6i*c6j*inverseCut6*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4));
vdwEnergy += emult + potentialshift;
}
if (useSwitch) { if (useSwitch) {
dEdR -= vdwEnergy*switchDeriv*inverseR; dEdR -= vdwEnergy*switchDeriv*inverseR;
vdwEnergy *= switchValue; vdwEnergy *= switchValue;
...@@ -452,6 +536,24 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec> ...@@ -452,6 +536,24 @@ void ReferenceLJCoulombIxn::calculateEwaldIxn(int numberOfAtoms, vector<RealVec>
realSpaceEwaldEnergy = (RealOpenMM) (alphaEwald*TWO_OVER_SQRT_PI*ONE_4PI_EPS0*atomParameters[ii][QIndex]*atomParameters[jj][QIndex]); realSpaceEwaldEnergy = (RealOpenMM) (alphaEwald*TWO_OVER_SQRT_PI*ONE_4PI_EPS0*atomParameters[ii][QIndex]*atomParameters[jj][QIndex]);
} }
if(ljpme){
// Dispersion terms. Here we just back out the reciprocal space terms, and don't add any extra real space terms.
RealOpenMM dalphaR = alphaDispersionEwald * r;
RealOpenMM inverseR2 = inverseR*inverseR;
RealOpenMM dar2 = dalphaR*dalphaR;
RealOpenMM dar4 = dar2*dar2;
RealOpenMM dar6 = dar4*dar2;
RealOpenMM c6i = 8.0*pow(atomParameters[ii][SigIndex], 3.0) * atomParameters[ii][EpsIndex];
RealOpenMM c6j = 8.0*pow(atomParameters[jj][SigIndex], 3.0) * atomParameters[jj][EpsIndex];
realSpaceEwaldEnergy -= c6i*c6j*inverseR2*inverseR2*inverseR2*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4));
RealOpenMM dEdR = -6.0*c6i*c6j*inverseR2*inverseR2*inverseR2*inverseR2*(1.0 - EXP(-dar2) * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
for (int kk = 0; kk < 3; kk++) {
RealOpenMM force = dEdR*deltaR[0][kk];
forces[ii][kk] -= force;
forces[jj][kk] += force;
}
}
totalExclusionEnergy += realSpaceEwaldEnergy; totalExclusionEnergy += realSpaceEwaldEnergy;
if (energyByAtom) { if (energyByAtom) {
energyByAtom[ii] -= realSpaceEwaldEnergy; energyByAtom[ii] -= realSpaceEwaldEnergy;
...@@ -488,7 +590,7 @@ void ReferenceLJCoulombIxn::calculatePairIxn(int numberOfAtoms, vector<RealVec>& ...@@ -488,7 +590,7 @@ void ReferenceLJCoulombIxn::calculatePairIxn(int numberOfAtoms, vector<RealVec>&
RealOpenMM* fixedParameters, vector<RealVec>& forces, RealOpenMM* fixedParameters, vector<RealVec>& forces,
RealOpenMM* energyByAtom, RealOpenMM* totalEnergy, bool includeDirect, bool includeReciprocal) const { RealOpenMM* energyByAtom, RealOpenMM* totalEnergy, bool includeDirect, bool includeReciprocal) const {
if (ewald || pme) { if (ewald || pme || ljpme) {
calculateEwaldIxn(numberOfAtoms, atomCoordinates, atomParameters, exclusions, fixedParameters, forces, energyByAtom, calculateEwaldIxn(numberOfAtoms, atomCoordinates, atomParameters, exclusions, fixedParameters, forces, energyByAtom,
totalEnergy, includeDirect, includeReciprocal); totalEnergy, includeDirect, includeReciprocal);
return; return;
...@@ -512,7 +614,7 @@ void ReferenceLJCoulombIxn::calculatePairIxn(int numberOfAtoms, vector<RealVec>& ...@@ -512,7 +614,7 @@ void ReferenceLJCoulombIxn::calculatePairIxn(int numberOfAtoms, vector<RealVec>&
} }
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn between two atoms Calculate LJ Coulomb pair ixn between two atoms
...@@ -608,5 +710,5 @@ void ReferenceLJCoulombIxn::calculateOneIxn(int ii, int jj, vector<RealVec>& ato ...@@ -608,5 +710,5 @@ void ReferenceLJCoulombIxn::calculateOneIxn(int ii, int jj, vector<RealVec>& ato
energyByAtom[ii] += energy; energyByAtom[ii] += energy;
energyByAtom[jj] += energy; energyByAtom[jj] += energy;
} }
} }
...@@ -513,6 +513,106 @@ pme_reciprocal_convolution(pme_t pme, ...@@ -513,6 +513,106 @@ pme_reciprocal_convolution(pme_t pme,
} }
static void
dpme_reciprocal_convolution(pme_t pme,
const RealVec periodicBoxVectors[3],
const RealVec recipBoxVectors[3],
RealOpenMM * energy)
{
int kx,ky,kz;
int nx,ny,nz;
RealOpenMM mx,my,mz;
RealOpenMM mhx,mhy,mhz,m2;
RealOpenMM bx,by,bz;
RealOpenMM d1,d2;
RealOpenMM eterm,struct2,ets2;
RealOpenMM esum;
RealOpenMM denom;
RealOpenMM boxfactor;
RealOpenMM maxkx,maxky,maxkz;
t_complex *ptr;
nx = pme->ngrid[0];
ny = pme->ngrid[1];
nz = pme->ngrid[2];
boxfactor = (RealOpenMM) M_PI*sqrt(M_PI) / (6.0*periodicBoxVectors[0][0]*periodicBoxVectors[1][1]*periodicBoxVectors[2][2]);
esum = 0;
maxkx = (RealOpenMM) ((nx+1)/2);
maxky = (RealOpenMM) ((ny+1)/2);
maxkz = (RealOpenMM) ((nz+1)/2);
RealOpenMM bfac = M_PI / pme->ewaldcoeff;
RealOpenMM fac1 = 2.0*M_PI*M_PI*M_PI*sqrt(M_PI);
RealOpenMM fac2 = pme->ewaldcoeff*pme->ewaldcoeff*pme->ewaldcoeff;
RealOpenMM fac3 = -2.0*pme->ewaldcoeff*M_PI*M_PI;
RealOpenMM b, m, m3, expfac, expterm, erfcterm;
for (kx=0;kx<nx;kx++)
{
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
mx = (RealOpenMM) ((kx<maxkx) ? kx : (kx-nx));
mhx = mx*recipBoxVectors[0][0];
bx = pme->bsplines_moduli[0][kx];
for (ky=0;ky<ny;ky++)
{
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
my = (RealOpenMM) ((ky<maxky) ? ky : (ky-ny));
mhy = mx*recipBoxVectors[1][0]+my*recipBoxVectors[1][1];
by = pme->bsplines_moduli[1][ky];
for (kz=0;kz<nz;kz++)
{
/*
* Unlike the Coulombic case, there's an m=0 term so all terms are considered here.
*/
/* Calculate frequency. Grid indices in the upper half correspond to negative frequencies! */
mz = (RealOpenMM) ((kz<maxkz) ? kz : (kz-nz));
mhz = mx*recipBoxVectors[2][0]+my*recipBoxVectors[2][1]+mz*recipBoxVectors[2][2];
/* Pointer to the grid cell in question */
ptr = pme->grid + kx*ny*nz + ky*nz + kz;
/* Get grid data for this frequency */
d1 = ptr->re;
d2 = ptr->im;
/* Calculate the convolution - see the Essman/Darden paper for the equation! */
m2 = mhx*mhx+mhy*mhy+mhz*mhz;
bz = pme->bsplines_moduli[2][kz];
denom = boxfactor / (bx*by*bz);
m = sqrt(m2);
m3 = m*m2;
b = bfac*m;
expfac = -b*b;
erfcterm = erfc(b);
expterm = exp(expfac);
eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
/* write back convolution data to grid */
ptr->re = d1*eterm;
ptr->im = d2*eterm;
struct2 = (d1*d1+d2*d2);
/* Long-range PME contribution to the energy for this frequency */
ets2 = eterm*struct2;
esum += ets2;
}
}
}
// Remember the C6 energy is attractive, hence the negative sign.
*energy = (RealOpenMM) (-esum);
}
static void static void
pme_grid_interpolate_force(pme_t pme, pme_grid_interpolate_force(pme_t pme,
const RealVec recipBoxVectors[3], const RealVec recipBoxVectors[3],
...@@ -704,6 +804,49 @@ int pme_exec(pme_t pme, ...@@ -704,6 +804,49 @@ int pme_exec(pme_t pme,
} }
int pme_exec_dpme(pme_t pme,
const vector<RealVec>& atomCoordinates,
vector<RealVec>& forces,
const vector<RealOpenMM>& c6s,
const RealVec periodicBoxVectors[3],
RealOpenMM* energy)
{
/* Routine is called with coordinates in x, a box, and charges in q */
RealVec recipBoxVectors[3];
invert_box_vectors(periodicBoxVectors, recipBoxVectors);
/* Before we can do the actual interpolation, we need to recalculate and update
* the indices for each particle in the charge grid (initialized in pme_init()),
* and what its fractional offset in this grid cell is.
*/
/* Update charge grid indices and fractional offsets for each atom.
* The indices/fractions are stored internally in the pme datatype
*/
pme_update_grid_index_and_fraction(pme,atomCoordinates,periodicBoxVectors,recipBoxVectors);
/* Calculate bsplines (and their differentials) from current fractional coordinates, store in pme structure */
pme_update_bsplines(pme);
/* Spread the charges on grid (using newly calculated bsplines in the pme structure) */
pme_grid_spread_charge(pme, c6s);
/* do 3d-fft */
fftpack_exec_3d(pme->fftplan,FFTPACK_FORWARD,pme->grid,pme->grid);
/* solve in k-space */
dpme_reciprocal_convolution(pme,periodicBoxVectors,recipBoxVectors,energy);
/* do 3d-invfft */
fftpack_exec_3d(pme->fftplan,FFTPACK_BACKWARD,pme->grid,pme->grid);
/* Get the particle forces from the grid and bsplines in the pme structure */
pme_grid_interpolate_force(pme,recipBoxVectors,c6s,forces);
return 0;
}
int int
pme_destroy(pme_t pme) pme_destroy(pme_t pme)
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "ReferenceTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
...@@ -1155,7 +1155,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1155,7 +1155,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
NonbondedForce nb; NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance()); nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
nb.setCutoffDistance(force.getCutoffDistance()); nb.setCutoffDistance(force.getCutoffDistance());
NonbondedForceImpl::calcPMEParameters(system, nb, alpha, gridSizeX, gridSizeY, gridSizeZ); NonbondedForceImpl::calcPMEParameters(system, nb, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX); gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY); gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ); gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment